diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index 2587e0fe3..a0682abe7 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -10,7 +10,6 @@ jobs:
   build:
     uses: huggingface/doc-builder/.github/workflows/build_main_documentation.yml@main
     with:
-      repo_owner: xenova
       commit_sha: ${{ github.sha }}
       package: transformers.js
       path_to_docs: transformers.js/docs/source
diff --git a/.github/workflows/pr-documentation.yml b/.github/workflows/pr-documentation.yml
index 5ac60b4fb..0e6415b4d 100644
--- a/.github/workflows/pr-documentation.yml
+++ b/.github/workflows/pr-documentation.yml
@@ -11,7 +11,6 @@ jobs:
   build:
     uses: huggingface/doc-builder/.github/workflows/build_pr_documentation.yml@main
     with:
-      repo_owner: xenova
       commit_sha: ${{ github.sha }}
       pr_number: ${{ github.event.number }}
       package: transformers.js
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 11427c54b..3b87f8b39 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -7,17 +7,20 @@ on:
   pull_request:
     branches:
       - main
-
-env:
-  TESTING_REMOTELY: true
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - ready_for_review
 
 jobs:
   build:
+    if: github.event.pull_request.draft == false
     runs-on: ubuntu-latest
 
     strategy:
       matrix:
-        node-version: [18.x, latest, node]
+        node-version: [18, 20, 22]
 
     steps:
       - uses: actions/checkout@v4
@@ -27,11 +30,9 @@ jobs:
           node-version: ${{ matrix.node-version }}
       - run: npm ci
       - run: npm run build
-      - run: pip install -r tests/requirements.txt
       
       # Setup the testing environment
-      - run: npm run generate-tests
-      - run: git lfs install && GIT_CLONE_PROTECTION_ACTIVE=false git clone https://huggingface.co/Xenova/t5-small ./models/t5-small
+      - run: git lfs install && GIT_CLONE_PROTECTION_ACTIVE=false git clone https://huggingface.co/hf-internal-testing/tiny-random-T5ForConditionalGeneration ./models/hf-internal-testing/tiny-random-T5ForConditionalGeneration
 
       # Actually run tests
       - run: npm run test
diff --git a/.prettierignore b/.prettierignore
new file mode 100644
index 000000000..bd1927ab2
--- /dev/null
+++ b/.prettierignore
@@ -0,0 +1,8 @@
+# Ignore artifacts:
+.github
+dist
+docs
+examples
+scripts
+types
+*.md
diff --git a/.prettierrc b/.prettierrc
new file mode 100644
index 000000000..57d5ce89a
--- /dev/null
+++ b/.prettierrc
@@ -0,0 +1,10 @@
+{
+    "overrides": [
+        {
+            "files": ["tests/**/*.js"],
+            "options": {
+              "printWidth": 10000000
+            }
+        }
+    ]
+}
diff --git a/README.md b/README.md
index 52e449516..49776b05d 100644
--- a/README.md
+++ b/README.md
@@ -3,19 +3,29 @@
 <p align="center">
     <br/>
     <picture> 
-        <source media="(prefers-color-scheme: dark)" srcset="https://github.com/xenova/transformers.js/assets/26504141/bd047e0f-aca9-4ff7-ba07-c7ca55442bc4" width="500" style="max-width: 100%;">
-        <source media="(prefers-color-scheme: light)" srcset="https://github.com/xenova/transformers.js/assets/26504141/84a5dc78-f4ea-43f4-96f2-b8c791f30a8e" width="500" style="max-width: 100%;">
-        <img alt="transformers.js javascript library logo" src="https://github.com/xenova/transformers.js/assets/26504141/84a5dc78-f4ea-43f4-96f2-b8c791f30a8e" width="500" style="max-width: 100%;">
+        <source media="(prefers-color-scheme: dark)" srcset="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/transformersjs-dark.svg" width="500" style="max-width: 100%;">
+        <source media="(prefers-color-scheme: light)" srcset="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/transformersjs-light.svg" width="500" style="max-width: 100%;">
+        <img alt="transformers.js javascript library logo" src="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/transformersjs-light.svg" width="500" style="max-width: 100%;">
     </picture>
     <br/>
 </p>
 
 <p align="center">
-    <a href="https://www.npmjs.com/package/@xenova/transformers"><img alt="NPM" src="https://img.shields.io/npm/v/@xenova/transformers"></a>
-    <a href="https://www.npmjs.com/package/@xenova/transformers"><img alt="NPM Downloads" src="https://img.shields.io/npm/dw/@xenova/transformers"></a>
-    <a href="https://www.jsdelivr.com/package/npm/@xenova/transformers"><img alt="jsDelivr Hits" src="https://img.shields.io/jsdelivr/npm/hw/@xenova/transformers"></a>
-    <a href="https://github.com/xenova/transformers.js/blob/main/LICENSE"><img alt="License" src="https://img.shields.io/github/license/xenova/transformers.js?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers.js/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers.js/index.svg?down_color=red&down_message=offline&up_message=online"></a>
+    <a href="https://www.npmjs.com/package/@huggingface/transformers">
+        <img alt="NPM" src="https://img.shields.io/npm/v/@huggingface/transformers">
+    </a>
+    <a href="https://www.npmjs.com/package/@huggingface/transformers">
+        <img alt="NPM Downloads" src="https://img.shields.io/npm/dw/@huggingface/transformers">
+    </a>
+    <a href="https://www.jsdelivr.com/package/npm/@huggingface/transformers">
+        <img alt="jsDelivr Hits" src="https://img.shields.io/jsdelivr/npm/hw/@huggingface/transformers">
+    </a>
+    <a href="https://github.com/huggingface/transformers.js/blob/main/LICENSE">
+        <img alt="License" src="https://img.shields.io/github/license/huggingface/transformers.js?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers.js/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers.js/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
 </p>
 
 
@@ -23,9 +33,9 @@ State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in
 
 Transformers.js is designed to be functionally equivalent to Hugging Face's [transformers](https://github.com/huggingface/transformers) python library, meaning you can run the same pretrained models using a very similar API. These models support common tasks in different modalities, such as:
   - 📝 **Natural Language Processing**: text classification, named entity recognition, question answering, language modeling, summarization, translation, multiple choice, and text generation.
-  - 🖼️ **Computer Vision**: image classification, object detection, and segmentation.
-  - 🗣️ **Audio**: automatic speech recognition and audio classification.
-  - 🐙 **Multimodal**: zero-shot image classification.
+  - 🖼️ **Computer Vision**: image classification, object detection, segmentation, and depth estimation.
+  - 🗣️ **Audio**: automatic speech recognition, audio classification, and text-to-speech.
+  - 🐙 **Multimodal**: embeddings, zero-shot audio classification, zero-shot image classification, and zero-shot object detection.
 
 Transformers.js uses [ONNX Runtime](https://onnxruntime.ai/) to run models in the browser. The best part about it, is that you can easily [convert](#convert-your-models-to-onnx) your pretrained PyTorch, TensorFlow, or JAX models to ONNX using [🤗 Optimum](https://github.com/huggingface/optimum#onnx--onnx-runtime). 
 
@@ -59,7 +69,7 @@ out = pipe('I love transformers!')
 <td>
 
 ```javascript
-import { pipeline } from '@xenova/transformers';
+import { pipeline } from '@huggingface/transformers';
 
 // Allocate a pipeline for sentiment-analysis
 let pipe = await pipeline('sentiment-analysis');
@@ -83,15 +93,15 @@ let pipe = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-u
 ## Installation
 
 
-To install via [NPM](https://www.npmjs.com/package/@xenova/transformers), run:
+To install via [NPM](https://www.npmjs.com/package/@huggingface/transformers), run:
 ```bash
-npm i @xenova/transformers
+npm i @huggingface/transformers
 ```
 
 Alternatively, you can use it in vanilla JS, without any bundler, by using a CDN or static hosting. For example, using [ES Modules](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules), you can import the library with:
 ```html
 <script type="module">
-    import { pipeline } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
+    import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0';
 </script>
 ```
 
@@ -104,18 +114,18 @@ Want to jump straight in? Get started with one of our sample applications/templa
 |-------------------|----------------------------------|-------------------------------|
 | Whisper Web       | Speech recognition w/ Whisper    | [code](https://github.com/xenova/whisper-web), [demo](https://huggingface.co/spaces/Xenova/whisper-web) |
 | Doodle Dash       | Real-time sketch-recognition game | [blog](https://huggingface.co/blog/ml-web-games), [code](https://github.com/xenova/doodle-dash), [demo](https://huggingface.co/spaces/Xenova/doodle-dash) |
-| Code Playground   | In-browser code completion website | [code](https://github.com/xenova/transformers.js/tree/main/examples/code-completion/), [demo](https://huggingface.co/spaces/Xenova/ai-code-playground) |
-| Semantic Image Search (client-side) | Search for images with text | [code](https://github.com/xenova/transformers.js/tree/main/examples/semantic-image-search-client/), [demo](https://huggingface.co/spaces/Xenova/semantic-image-search-client) |
-| Semantic Image Search (server-side) | Search for images with text (Supabase) | [code](https://github.com/xenova/transformers.js/tree/main/examples/semantic-image-search/), [demo](https://huggingface.co/spaces/Xenova/semantic-image-search) |
-| Vanilla JavaScript | In-browser object detection     | [video](https://scrimba.com/scrim/cKm9bDAg), [code](https://github.com/xenova/transformers.js/tree/main/examples/vanilla-js/), [demo](https://huggingface.co/spaces/Scrimba/vanilla-js-object-detector) |
-| React             | Multilingual translation website | [code](https://github.com/xenova/transformers.js/tree/main/examples/react-translator/), [demo](https://huggingface.co/spaces/Xenova/react-translator) |
-| Text to speech (client-side) | In-browser speech synthesis | [code](https://github.com/xenova/transformers.js/tree/main/examples/text-to-speech-client/), [demo](https://huggingface.co/spaces/Xenova/text-to-speech-client) |
-| Browser extension | Text classification extension    | [code](https://github.com/xenova/transformers.js/tree/main/examples/extension/) |
-| Electron          | Text classification application  | [code](https://github.com/xenova/transformers.js/tree/main/examples/electron/)  |
-| Next.js (client-side) | Sentiment analysis (in-browser inference) | [code](https://github.com/xenova/transformers.js/tree/main/examples/next-client/), [demo](https://huggingface.co/spaces/Xenova/next-example-app) |
-| Next.js (server-side) | Sentiment analysis (Node.js inference) | [code](https://github.com/xenova/transformers.js/tree/main/examples/next-server/), [demo](https://huggingface.co/spaces/Xenova/next-server-example-app) |
-| Node.js           | Sentiment analysis API           | [code](https://github.com/xenova/transformers.js/tree/main/examples/node/)      |
-| Demo site         | A collection of demos | [code](https://github.com/xenova/transformers.js/tree/main/examples/demo-site/), [demo](https://xenova.github.io/transformers.js/) |
+| Code Playground   | In-browser code completion website | [code](https://github.com/huggingface/transformers.js/tree/main/examples/code-completion/), [demo](https://huggingface.co/spaces/Xenova/ai-code-playground) |
+| Semantic Image Search (client-side) | Search for images with text | [code](https://github.com/huggingface/transformers.js/tree/main/examples/semantic-image-search-client/), [demo](https://huggingface.co/spaces/Xenova/semantic-image-search-client) |
+| Semantic Image Search (server-side) | Search for images with text (Supabase) | [code](https://github.com/huggingface/transformers.js/tree/main/examples/semantic-image-search/), [demo](https://huggingface.co/spaces/Xenova/semantic-image-search) |
+| Vanilla JavaScript | In-browser object detection     | [video](https://scrimba.com/scrim/cKm9bDAg), [code](https://github.com/huggingface/transformers.js/tree/main/examples/vanilla-js/), [demo](https://huggingface.co/spaces/Scrimba/vanilla-js-object-detector) |
+| React             | Multilingual translation website | [code](https://github.com/huggingface/transformers.js/tree/main/examples/react-translator/), [demo](https://huggingface.co/spaces/Xenova/react-translator) |
+| Text to speech (client-side) | In-browser speech synthesis | [code](https://github.com/huggingface/transformers.js/tree/main/examples/text-to-speech-client/), [demo](https://huggingface.co/spaces/Xenova/text-to-speech-client) |
+| Browser extension | Text classification extension    | [code](https://github.com/huggingface/transformers.js/tree/main/examples/extension/) |
+| Electron          | Text classification application  | [code](https://github.com/huggingface/transformers.js/tree/main/examples/electron/)  |
+| Next.js (client-side) | Sentiment analysis (in-browser inference) | [code](https://github.com/huggingface/transformers.js/tree/main/examples/next-client/), [demo](https://huggingface.co/spaces/Xenova/next-example-app) |
+| Next.js (server-side) | Sentiment analysis (Node.js inference) | [code](https://github.com/huggingface/transformers.js/tree/main/examples/next-server/), [demo](https://huggingface.co/spaces/Xenova/next-server-example-app) |
+| Node.js           | Sentiment analysis API           | [code](https://github.com/huggingface/transformers.js/tree/main/examples/node/)      |
+| Demo site         | A collection of demos | [code](https://github.com/huggingface/transformers.js/tree/main/examples/demo-site/), [demo](https://xenova.github.io/transformers.js/) |
 
 Check out the Transformers.js [template](https://huggingface.co/new-space?template=static-templates%2Ftransformers.js) on Hugging Face to get started in one click!
 
@@ -124,13 +134,12 @@ Check out the Transformers.js [template](https://huggingface.co/new-space?templa
 
 
 
-By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2/dist/), which should work out-of-the-box. You can customize this as follows:
-
+By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0/dist/), which should work out-of-the-box. You can customize this as follows:
 
 ### Settings
 
 ```javascript
-import { env } from '@xenova/transformers';
+import { env } from '@huggingface/transformers';
 
 // Specify a custom location for models (defaults to '/models/').
 env.localModelPath = '/path/to/models/';
@@ -146,7 +155,7 @@ For a full list of available settings, check out the [API Reference](https://hug
 
 ### Convert your models to ONNX
 
-We recommend using our [conversion script](https://github.com/xenova/transformers.js/blob/main/scripts/convert.py) to convert your PyTorch, TensorFlow, or JAX models to ONNX in a single command. Behind the scenes, it uses [🤗 Optimum](https://huggingface.co/docs/optimum) to perform conversion and quantization of your model.
+We recommend using our [conversion script](https://github.com/huggingface/transformers.js/blob/main/scripts/convert.py) to convert your PyTorch, TensorFlow, or JAX models to ONNX in a single command. Behind the scenes, it uses [🤗 Optimum](https://huggingface.co/docs/optimum) to perform conversion and quantization of your model.
 
 ```bash
 python -m scripts.convert --quantize --model_id <model_name_or_path>
@@ -176,7 +185,7 @@ For the full list of supported architectures, see the [Optimum documentation](ht
 
 Here is the list of all tasks and architectures currently supported by Transformers.js.
 If you don't see your task/model listed here or it is not yet supported, feel free
-to open up a feature request [here](https://github.com/xenova/transformers.js/issues/new/choose).
+to open up a feature request [here](https://github.com/huggingface/transformers.js/issues/new/choose).
 
 To find compatible models on the Hub, select the "transformers.js" library tag in the filter menu (or visit [this link](https://huggingface.co/models?library=transformers.js)).
 You can refine your search by selecting the task you're interested in (e.g., [text-classification](https://huggingface.co/models?pipeline_tag=text-classification&library=transformers.js)).
@@ -271,6 +280,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te
 1. **[CLIPSeg](https://huggingface.co/docs/transformers/model_doc/clipseg)** (from University of Göttingen) released with the paper [Image Segmentation Using Text and Image Prompts](https://arxiv.org/abs/2112.10003) by Timo Lüddecke and Alexander Ecker.
 1. **[CodeGen](https://huggingface.co/docs/transformers/model_doc/codegen)** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis](https://arxiv.org/abs/2203.13474) by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
 1. **[CodeLlama](https://huggingface.co/docs/transformers/model_doc/llama_code)** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
+1. **[Cohere](https://huggingface.co/docs/transformers/main/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
 1. **[ConvBERT](https://huggingface.co/docs/transformers/model_doc/convbert)** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution](https://arxiv.org/abs/2008.02496) by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
 1. **[ConvNeXT](https://huggingface.co/docs/transformers/model_doc/convnext)** (from Facebook AI) released with the paper [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
 1. **[ConvNeXTV2](https://huggingface.co/docs/transformers/model_doc/convnextv2)** (from Facebook AI) released with the paper [ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders](https://arxiv.org/abs/2301.00808) by Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon, Saining Xie.
@@ -279,6 +289,7 @@ You can refine your search by selecting the task you're interested in (e.g., [te
 1. **[Decision Transformer](https://huggingface.co/docs/transformers/model_doc/decision_transformer)** (from Berkeley/Facebook/Google) released with the paper [Decision Transformer: Reinforcement Learning via Sequence Modeling](https://arxiv.org/abs/2106.01345) by Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Michael Laskin, Pieter Abbeel, Aravind Srinivas, Igor Mordatch.
 1. **[DeiT](https://huggingface.co/docs/transformers/model_doc/deit)** (from Facebook) released with the paper [Training data-efficient image transformers & distillation through attention](https://arxiv.org/abs/2012.12877) by Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, Hervé Jégou.
 1. **[Depth Anything](https://huggingface.co/docs/transformers/main/model_doc/depth_anything)** (from University of Hong Kong and TikTok) released with the paper [Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data](https://arxiv.org/abs/2401.10891) by Lihe Yang, Bingyi Kang, Zilong Huang, Xiaogang Xu, Jiashi Feng, Hengshuang Zhao.
+1. **Depth Pro** (from Apple) released with the paper [Depth Pro: Sharp Monocular Metric Depth in Less Than a Second](https://arxiv.org/abs/2410.02073) by Aleksei Bochkovskii, Amaël Delaunoy, Hugo Germain, Marcel Santos, Yichao Zhou, Stephan R. Richter, Vladlen Koltun.
 1. **[DETR](https://huggingface.co/docs/transformers/model_doc/detr)** (from Facebook) released with the paper [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872) by Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, Sergey Zagoruyko.
 1. **[DINOv2](https://huggingface.co/docs/transformers/model_doc/dinov2)** (from Meta AI) released with the paper [DINOv2: Learning Robust Visual Features without Supervision](https://arxiv.org/abs/2304.07193) by Maxime Oquab, Timothée Darcet, Théo Moutakanni, Huy Vo, Marc Szafraniec, Vasil Khalidov, Pierre Fernandez, Daniel Haziza, Francisco Massa, Alaaeldin El-Nouby, Mahmoud Assran, Nicolas Ballas, Wojciech Galuba, Russell Howes, Po-Yao Huang, Shang-Wen Li, Ishan Misra, Michael Rabbat, Vasu Sharma, Gabriel Synnaeve, Hu Xu, Hervé Jegou, Julien Mairal, Patrick Labatut, Armand Joulin, Piotr Bojanowski.
 1. **[DistilBERT](https://huggingface.co/docs/transformers/model_doc/distilbert)** (from HuggingFace), released together with the paper [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter](https://arxiv.org/abs/1910.01108) by Victor Sanh, Lysandre Debut and Thomas Wolf. The same method has been applied to compress GPT2 into [DistilGPT2](https://github.com/huggingface/transformers/tree/main/examples/research_projects/distillation), RoBERTa into [DistilRoBERTa](https://github.com/huggingface/transformers/tree/main/examples/research_projects/distillation), Multilingual BERT into [DistilmBERT](https://github.com/huggingface/transformers/tree/main/examples/research_projects/distillation) and a German version of DistilBERT.
@@ -291,39 +302,61 @@ You can refine your search by selecting the task you're interested in (e.g., [te
 1. **[Falcon](https://huggingface.co/docs/transformers/model_doc/falcon)** (from Technology Innovation Institute) by Almazrouei, Ebtesam and Alobeidli, Hamza and Alshamsi, Abdulaziz and Cappelli, Alessandro and Cojocaru, Ruxandra and Debbah, Merouane and Goffinet, Etienne and Heslow, Daniel and Launay, Julien and Malartic, Quentin and Noune, Badreddine and Pannier, Baptiste and Penedo, Guilherme.
 1. **FastViT** (from Apple) released with the paper [FastViT: A Fast Hybrid Vision Transformer using Structural Reparameterization](https://arxiv.org/abs/2303.14189) by Pavan Kumar Anasosalu Vasu, James Gabriel, Jeff Zhu, Oncel Tuzel and Anurag Ranjan.
 1. **[FLAN-T5](https://huggingface.co/docs/transformers/model_doc/flan-t5)** (from Google AI) released in the repository [google-research/t5x](https://github.com/google-research/t5x/blob/main/docs/models.md#flan-t5-checkpoints) by Hyung Won Chung, Le Hou, Shayne Longpre, Barret Zoph, Yi Tay, William Fedus, Eric Li, Xuezhi Wang, Mostafa Dehghani, Siddhartha Brahma, Albert Webson, Shixiang Shane Gu, Zhuyun Dai, Mirac Suzgun, Xinyun Chen, Aakanksha Chowdhery, Sharan Narang, Gaurav Mishra, Adams Yu, Vincent Zhao, Yanping Huang, Andrew Dai, Hongkun Yu, Slav Petrov, Ed H. Chi, Jeff Dean, Jacob Devlin, Adam Roberts, Denny Zhou, Quoc V. Le, and Jason Wei
+1. **Florence2** (from Microsoft) released with the paper [Florence-2: Advancing a Unified Representation for a Variety of Vision Tasks](https://arxiv.org/abs/2311.06242) by Bin Xiao, Haiping Wu, Weijian Xu, Xiyang Dai, Houdong Hu, Yumao Lu, Michael Zeng, Ce Liu, Lu Yuan.
+1. **[Gemma](https://huggingface.co/docs/transformers/main/model_doc/gemma)** (from Google) released with the paper [Gemma: Open Models Based on Gemini Technology and Research](https://blog.google/technology/developers/gemma-open-models/) by the Gemma Google team.
+1. **[Gemma2](https://huggingface.co/docs/transformers/main/model_doc/gemma2)** (from Google) released with the paper [Gemma2: Open Models Based on Gemini Technology and Research](https://blog.google/technology/developers/google-gemma-2/) by the Gemma Google team.
 1. **[GLPN](https://huggingface.co/docs/transformers/model_doc/glpn)** (from KAIST) released with the paper [Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth](https://arxiv.org/abs/2201.07436) by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim.
 1. **[GPT Neo](https://huggingface.co/docs/transformers/model_doc/gpt_neo)** (from EleutherAI) released in the repository [EleutherAI/gpt-neo](https://github.com/EleutherAI/gpt-neo) by Sid Black, Stella Biderman, Leo Gao, Phil Wang and Connor Leahy.
 1. **[GPT NeoX](https://huggingface.co/docs/transformers/model_doc/gpt_neox)** (from EleutherAI) released with the paper [GPT-NeoX-20B: An Open-Source Autoregressive Language Model](https://arxiv.org/abs/2204.06745) by Sid Black, Stella Biderman, Eric Hallahan, Quentin Anthony, Leo Gao, Laurence Golding, Horace He, Connor Leahy, Kyle McDonell, Jason Phang, Michael Pieler, USVSN Sai Prashanth, Shivanshu Purohit, Laria Reynolds, Jonathan Tow, Ben Wang, Samuel Weinbach
 1. **[GPT-2](https://huggingface.co/docs/transformers/model_doc/gpt2)** (from OpenAI) released with the paper [Language Models are Unsupervised Multitask Learners](https://blog.openai.com/better-language-models/) by Alec Radford*, Jeffrey Wu*, Rewon Child, David Luan, Dario Amodei** and Ilya Sutskever**.
 1. **[GPT-J](https://huggingface.co/docs/transformers/model_doc/gptj)** (from EleutherAI) released in the repository [kingoflolz/mesh-transformer-jax](https://github.com/kingoflolz/mesh-transformer-jax/) by Ben Wang and Aran Komatsuzaki.
 1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988) by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
+1. **[Granite](https://huggingface.co/docs/transformers/main/model_doc/granite)** (from IBM) released with the paper [Power Scheduler: A Batch Size and Token Number Agnostic Learning Rate Scheduler](https://arxiv.org/abs/2408.13359) by Yikang Shen, Matthew Stallone, Mayank Mishra, Gaoyuan Zhang, Shawn Tan, Aditya Prasad, Adriana Meza Soria, David D. Cox, Rameswar Panda.
+1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
 1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (from Allegro.pl, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf) by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
+1. **[Hiera](https://huggingface.co/docs/transformers/model_doc/hiera)** (from Meta) released with the paper [Hiera: A Hierarchical Vision Transformer without the Bells-and-Whistles](https://arxiv.org/pdf/2306.00989) by Chaitanya Ryali, Yuan-Ting Hu, Daniel Bolya, Chen Wei, Haoqi Fan, Po-Yao Huang, Vaibhav Aggarwal, Arkabandhu Chowdhury, Omid Poursaeed, Judy Hoffman, Jitendra Malik, Yanghao Li, Christoph Feichtenhofer.
 1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
+1. **JAIS** (from Core42) released with the paper [Jais and Jais-chat: Arabic-Centric Foundation and Instruction-Tuned Open Generative Large Language Models](https://arxiv.org/pdf/2308.16149) by Neha Sengupta, Sunil Kumar Sahu, Bokang Jia, Satheesh Katipomu, Haonan Li, Fajri Koto, William Marshall, Gurpreet Gosal, Cynthia Liu, Zhiming Chen, Osama Mohammed Afzal, Samta Kamboj, Onkar Pandit, Rahul Pal, Lalit Pradhan, Zain Muhammad Mujahid, Massa Baali, Xudong Han, Sondos Mahmoud Bsharat, Alham Fikri Aji, Zhiqiang Shen, Zhengzhong Liu, Natalia Vassilieva, Joel Hestness, Andy Hock, Andrew Feldman, Jonathan Lee, Andrew Jackson, Hector Xuguang Ren, Preslav Nakov, Timothy Baldwin, Eric Xing.
 1. **[LongT5](https://huggingface.co/docs/transformers/model_doc/longt5)** (from Google AI) released with the paper [LongT5: Efficient Text-To-Text Transformer for Long Sequences](https://arxiv.org/abs/2112.07916) by Mandy Guo, Joshua Ainslie, David Uthus, Santiago Ontanon, Jianmo Ni, Yun-Hsuan Sung, Yinfei Yang.
 1. **[LLaMA](https://huggingface.co/docs/transformers/model_doc/llama)** (from The FAIR team of Meta AI) released with the paper [LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971) by Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timothée Lacroix, Baptiste Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, Aurelien Rodriguez, Armand Joulin, Edouard Grave, Guillaume Lample.
 1. **[Llama2](https://huggingface.co/docs/transformers/model_doc/llama2)** (from The FAIR team of Meta AI) released with the paper [Llama2: Open Foundation and Fine-Tuned Chat Models](https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/XXX) by Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, Dan Bikel, Lukas Blecher, Cristian Canton Ferrer, Moya Chen, Guillem Cucurull, David Esiobu, Jude Fernandes, Jeremy Fu, Wenyin Fu, Brian Fuller, Cynthia Gao, Vedanuj Goswami, Naman Goyal, Anthony Hartshorn, Saghar Hosseini, Rui Hou, Hakan Inan, Marcin Kardas, Viktor Kerkez Madian Khabsa, Isabel Kloumann, Artem Korenev, Punit Singh Koura, Marie-Anne Lachaux, Thibaut Lavril, Jenya Lee, Diana Liskovich, Yinghai Lu, Yuning Mao, Xavier Martinet, Todor Mihaylov, Pushka rMishra, Igor Molybog, Yixin Nie, Andrew Poulton, Jeremy Reizenstein, Rashi Rungta, Kalyan Saladi, Alan Schelten, Ruan Silva, Eric Michael Smith, Ranjan Subramanian, Xiaoqing EllenTan, Binh Tang, Ross Taylor, Adina Williams, Jian Xiang Kuan, Puxin Xu, Zheng Yan, Iliyan Zarov, Yuchen Zhang, Angela Fan, Melanie Kambadur, Sharan Narang, Aurelien Rodriguez, Robert Stojnic, Sergey Edunov, Thomas Scialom.
+1. **[LLaVa](https://huggingface.co/docs/transformers/model_doc/llava)** (from Microsoft Research & University of Wisconsin-Madison) released with the paper [Visual Instruction Tuning](https://arxiv.org/abs/2304.08485) by Haotian Liu, Chunyuan Li, Yuheng Li and Yong Jae Lee.
 1. **[M2M100](https://huggingface.co/docs/transformers/model_doc/m2m_100)** (from Facebook) released with the paper [Beyond English-Centric Multilingual Machine Translation](https://arxiv.org/abs/2010.11125) by Angela Fan, Shruti Bhosale, Holger Schwenk, Zhiyi Ma, Ahmed El-Kishky, Siddharth Goyal, Mandeep Baines, Onur Celebi, Guillaume Wenzek, Vishrav Chaudhary, Naman Goyal, Tom Birch, Vitaliy Liptchinsky, Sergey Edunov, Edouard Grave, Michael Auli, Armand Joulin.
 1. **[MarianMT](https://huggingface.co/docs/transformers/model_doc/marian)** Machine translation models trained using [OPUS](http://opus.nlpl.eu/) data by Jörg Tiedemann. The [Marian Framework](https://marian-nmt.github.io/) is being developed by the Microsoft Translator Team.
+1. **[MaskFormer](https://huggingface.co/docs/transformers/model_doc/maskformer)** (from Meta and UIUC) released with the paper [Per-Pixel Classification is Not All You Need for Semantic Segmentation](https://arxiv.org/abs/2107.06278) by Bowen Cheng, Alexander G. Schwing, Alexander Kirillov.
 1. **[mBART](https://huggingface.co/docs/transformers/model_doc/mbart)** (from Facebook) released with the paper [Multilingual Denoising Pre-training for Neural Machine Translation](https://arxiv.org/abs/2001.08210) by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov, Marjan Ghazvininejad, Mike Lewis, Luke Zettlemoyer.
 1. **[mBART-50](https://huggingface.co/docs/transformers/model_doc/mbart)** (from Facebook) released with the paper [Multilingual Translation with Extensible Multilingual Pretraining and Finetuning](https://arxiv.org/abs/2008.00401) by Yuqing Tang, Chau Tran, Xian Li, Peng-Jen Chen, Naman Goyal, Vishrav Chaudhary, Jiatao Gu, Angela Fan.
+1. **[MusicGen](https://huggingface.co/docs/transformers/model_doc/musicgen)** (from Meta) released with the paper [Simple and Controllable Music Generation](https://arxiv.org/abs/2306.05284) by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
 1. **[Mistral](https://huggingface.co/docs/transformers/model_doc/mistral)** (from Mistral AI) by The [Mistral AI](https://mistral.ai) team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
 1. **[MMS](https://huggingface.co/docs/transformers/model_doc/mms)** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages](https://arxiv.org/abs/2305.13516) by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli.
 1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
+1. **MobileCLIP** (from Apple) released with the paper [MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training](https://arxiv.org/abs/2311.17049) by Pavan Kumar Anasosalu Vasu, Hadi Pouransari, Fartash Faghri, Raviteja Vemulapalli, Oncel Tuzel.
+1. **[MobileNetV1](https://huggingface.co/docs/transformers/model_doc/mobilenet_v1)** (from Google Inc.) released with the paper [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861) by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam.
+1. **[MobileNetV2](https://huggingface.co/docs/transformers/model_doc/mobilenet_v2)** (from Google Inc.) released with the paper [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen.
+1. **MobileNetV3** (from Google Inc.) released with the paper [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) by Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam.
+1. **MobileNetV4** (from Google Inc.) released with the paper [MobileNetV4 - Universal Models for the Mobile Ecosystem](https://arxiv.org/abs/2404.10518) by Danfeng Qin, Chas Leichner, Manolis Delakis, Marco Fornoni, Shixin Luo, Fan Yang, Weijun Wang, Colby Banbury, Chengxi Ye, Berkin Akin, Vaibhav Aggarwal, Tenghui Zhu, Daniele Moro, Andrew Howard.
 1. **[MobileViT](https://huggingface.co/docs/transformers/model_doc/mobilevit)** (from Apple) released with the paper [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari.
 1. **[MobileViTV2](https://huggingface.co/docs/transformers/model_doc/mobilevitv2)** (from Apple) released with the paper [Separable Self-attention for Mobile Vision Transformers](https://arxiv.org/abs/2206.02680) by Sachin Mehta and Mohammad Rastegari.
+1. **Moondream1** released in the repository [moondream](https://github.com/vikhyat/moondream) by vikhyat.
 1. **[MPNet](https://huggingface.co/docs/transformers/model_doc/mpnet)** (from Microsoft Research) released with the paper [MPNet: Masked and Permuted Pre-training for Language Understanding](https://arxiv.org/abs/2004.09297) by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
 1. **[MPT](https://huggingface.co/docs/transformers/model_doc/mpt)** (from MosaiML) released with the repository [llm-foundry](https://github.com/mosaicml/llm-foundry/) by the MosaicML NLP Team.
 1. **[MT5](https://huggingface.co/docs/transformers/model_doc/mt5)** (from Google AI) released with the paper [mT5: A massively multilingual pre-trained text-to-text transformer](https://arxiv.org/abs/2010.11934) by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, Colin Raffel.
 1. **[NLLB](https://huggingface.co/docs/transformers/model_doc/nllb)** (from Meta) released with the paper [No Language Left Behind: Scaling Human-Centered Machine Translation](https://arxiv.org/abs/2207.04672) by the NLLB team.
 1. **[Nougat](https://huggingface.co/docs/transformers/model_doc/nougat)** (from Meta AI) released with the paper [Nougat: Neural Optical Understanding for Academic Documents](https://arxiv.org/abs/2308.13418) by Lukas Blecher, Guillem Cucurull, Thomas Scialom, Robert Stojnic.
+1. **OpenELM** (from Apple) released with the paper [OpenELM: An Efficient Language Model Family with Open-source Training and Inference Framework](https://arxiv.org/abs/2404.14619) by Sachin Mehta, Mohammad Hossein Sekhavat, Qingqing Cao, Maxwell Horton, Yanzi Jin, Chenfan Sun, Iman Mirzadeh, Mahyar Najibi, Dmitry Belenko, Peter Zatloukal, Mohammad Rastegari.
 1. **[OPT](https://huggingface.co/docs/transformers/master/model_doc/opt)** (from Meta AI) released with the paper [OPT: Open Pre-trained Transformer Language Models](https://arxiv.org/abs/2205.01068) by Susan Zhang, Stephen Roller, Naman Goyal, Mikel Artetxe, Moya Chen, Shuohui Chen et al.
 1. **[OWL-ViT](https://huggingface.co/docs/transformers/model_doc/owlvit)** (from Google AI) released with the paper [Simple Open-Vocabulary Object Detection with Vision Transformers](https://arxiv.org/abs/2205.06230) by Matthias Minderer, Alexey Gritsenko, Austin Stone, Maxim Neumann, Dirk Weissenborn, Alexey Dosovitskiy, Aravindh Mahendran, Anurag Arnab, Mostafa Dehghani, Zhuoran Shen, Xiao Wang, Xiaohua Zhai, Thomas Kipf, and Neil Houlsby.
 1. **[OWLv2](https://huggingface.co/docs/transformers/model_doc/owlv2)** (from Google AI) released with the paper [Scaling Open-Vocabulary Object Detection](https://arxiv.org/abs/2306.09683) by Matthias Minderer, Alexey Gritsenko, Neil Houlsby.
 1. **[Phi](https://huggingface.co/docs/transformers/main/model_doc/phi)** (from Microsoft) released with the papers - [Textbooks Are All You Need](https://arxiv.org/abs/2306.11644) by Suriya Gunasekar, Yi Zhang, Jyoti Aneja, Caio César Teodoro Mendes, Allie Del Giorno, Sivakanth Gopi, Mojan Javaheripi, Piero Kauffmann, Gustavo de Rosa, Olli Saarikivi, Adil Salim, Shital Shah, Harkirat Singh Behl, Xin Wang, Sébastien Bubeck, Ronen Eldan, Adam Tauman Kalai, Yin Tat Lee and Yuanzhi Li, [Textbooks Are All You Need II: phi-1.5 technical report](https://arxiv.org/abs/2309.05463) by Yuanzhi Li, Sébastien Bubeck, Ronen Eldan, Allie Del Giorno, Suriya Gunasekar and Yin Tat Lee.
+1. **[Phi3](https://huggingface.co/docs/transformers/main/model_doc/phi3)** (from Microsoft) released with the paper [Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone](https://arxiv.org/abs/2404.14219) by Marah Abdin, Sam Ade Jacobs, Ammar Ahmad Awan, Jyoti Aneja, Ahmed Awadallah, Hany Awadalla, Nguyen Bach, Amit Bahree, Arash Bakhtiari, Harkirat Behl, Alon Benhaim, Misha Bilenko, Johan Bjorck, Sébastien Bubeck, Martin Cai, Caio César Teodoro Mendes, Weizhu Chen, Vishrav Chaudhary, Parul Chopra, Allie Del Giorno, Gustavo de Rosa, Matthew Dixon, Ronen Eldan, Dan Iter, Amit Garg, Abhishek Goswami, Suriya Gunasekar, Emman Haider, Junheng Hao, Russell J. Hewett, Jamie Huynh, Mojan Javaheripi, Xin Jin, Piero Kauffmann, Nikos Karampatziakis, Dongwoo Kim, Mahoud Khademi, Lev Kurilenko, James R. Lee, Yin Tat Lee, Yuanzhi Li, Chen Liang, Weishung Liu, Eric Lin, Zeqi Lin, Piyush Madan, Arindam Mitra, Hardik Modi, Anh Nguyen, Brandon Norick, Barun Patra, Daniel Perez-Becker, Thomas Portet, Reid Pryzant, Heyang Qin, Marko Radmilac, Corby Rosset, Sambudha Roy, Olatunji Ruwase, Olli Saarikivi, Amin Saied, Adil Salim, Michael Santacroce, Shital Shah, Ning Shang, Hiteshi Sharma, Xia Song, Masahiro Tanaka, Xin Wang, Rachel Ward, Guanhua Wang, Philipp Witte, Michael Wyatt, Can Xu, Jiahang Xu, Sonali Yadav, Fan Yang, Ziyi Yang, Donghan Yu, Chengruidong Zhang, Cyril Zhang, Jianwen Zhang, Li Lyna Zhang, Yi Zhang, Yue Zhang, Yunan Zhang, Xiren Zhou.
+1. **[PVT](https://huggingface.co/docs/transformers/main/model_doc/pvt)** (from Nanjing University, The University of Hong Kong etc.) released with the paper [Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions](https://arxiv.org/pdf/2102.12122.pdf) by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
+1. **PyAnnote** released in the repository [pyannote/pyannote-audio](https://github.com/pyannote/pyannote-audio) by Hervé Bredin.
 1. **[Qwen2](https://huggingface.co/docs/transformers/model_doc/qwen2)** (from the Qwen team, Alibaba Group) released with the paper [Qwen Technical Report](https://arxiv.org/abs/2309.16609) by Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Kai Dang, Xiaodong Deng, Yang Fan, Wenbin Ge, Yu Han, Fei Huang, Binyuan Hui, Luo Ji, Mei Li, Junyang Lin, Runji Lin, Dayiheng Liu, Gao Liu, Chengqiang Lu, Keming Lu, Jianxin Ma, Rui Men, Xingzhang Ren, Xuancheng Ren, Chuanqi Tan, Sinan Tan, Jianhong Tu, Peng Wang, Shijie Wang, Wei Wang, Shengguang Wu, Benfeng Xu, Jin Xu, An Yang, Hao Yang, Jian Yang, Shusheng Yang, Yang Yao, Bowen Yu, Hongyi Yuan, Zheng Yuan, Jianwei Zhang, Xingxuan Zhang, Yichang Zhang, Zhenru Zhang, Chang Zhou, Jingren Zhou, Xiaohuan Zhou and Tianhang Zhu.
 1. **[ResNet](https://huggingface.co/docs/transformers/model_doc/resnet)** (from Microsoft Research) released with the paper [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) by Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun.
 1. **[RoBERTa](https://huggingface.co/docs/transformers/model_doc/roberta)** (from Facebook), released together with the paper [RoBERTa: A Robustly Optimized BERT Pretraining Approach](https://arxiv.org/abs/1907.11692) by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov.
 1. **[RoFormer](https://huggingface.co/docs/transformers/model_doc/roformer)** (from ZhuiyiTechnology), released together with the paper [RoFormer: Enhanced Transformer with Rotary Position Embedding](https://arxiv.org/abs/2104.09864) by Jianlin Su and Yu Lu and Shengfeng Pan and Bo Wen and Yunfeng Liu.
+1. **[RT-DETR](https://huggingface.co/docs/transformers/model_doc/rt_detr)** (from Baidu), released together with the paper [DETRs Beat YOLOs on Real-time Object Detection](https://arxiv.org/abs/2304.08069) by Yian Zhao, Wenyu Lv, Shangliang Xu, Jinman Wei, Guanzhong Wang, Qingqing Dang, Yi Liu, Jie Chen.
+1. **Sapiens** (from Meta AI) released with the paper [Sapiens: Foundation for Human Vision Models](https://arxiv.org/pdf/2408.12569) by Rawal Khirodkar, Timur Bagautdinov, Julieta Martinez, Su Zhaoen, Austin James, Peter Selednik, Stuart Anderson, Shunsuke Saito.
 1. **[SegFormer](https://huggingface.co/docs/transformers/model_doc/segformer)** (from NVIDIA) released with the paper [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping Luo.
 1. **[Segment Anything](https://huggingface.co/docs/transformers/model_doc/sam)** (from Meta AI) released with the paper [Segment Anything](https://arxiv.org/pdf/2304.02643v1.pdf) by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick.
 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://arxiv.org/abs/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer.
@@ -340,7 +373,9 @@ You can refine your search by selecting the task you're interested in (e.g., [te
 1. **[UniSpeech](https://huggingface.co/docs/transformers/model_doc/unispeech)** (from Microsoft Research) released with the paper [UniSpeech: Unified Speech Representation Learning with Labeled and Unlabeled Data](https://arxiv.org/abs/2101.07597) by Chengyi Wang, Yu Wu, Yao Qian, Kenichi Kumatani, Shujie Liu, Furu Wei, Michael Zeng, Xuedong Huang.
 1. **[UniSpeechSat](https://huggingface.co/docs/transformers/model_doc/unispeech-sat)** (from Microsoft Research) released with the paper [UNISPEECH-SAT: UNIVERSAL SPEECH REPRESENTATION LEARNING WITH SPEAKER AWARE PRE-TRAINING](https://arxiv.org/abs/2110.05752) by Sanyuan Chen, Yu Wu, Chengyi Wang, Zhengyang Chen, Zhuo Chen, Shujie Liu, Jian Wu, Yao Qian, Furu Wei, Jinyu Li, Xiangzhan Yu.
 1. **[Vision Transformer (ViT)](https://huggingface.co/docs/transformers/model_doc/vit)** (from Google AI) released with the paper [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby.
+1. **[ViTMAE](https://huggingface.co/docs/transformers/model_doc/vit_mae)** (from Meta AI) released with the paper [Masked Autoencoders Are Scalable Vision Learners](https://arxiv.org/abs/2111.06377) by Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li, Piotr Dollár, Ross Girshick.
 1. **[ViTMatte](https://huggingface.co/docs/transformers/model_doc/vitmatte)** (from HUST-VL) released with the paper [ViTMatte: Boosting Image Matting with Pretrained Plain Vision Transformers](https://arxiv.org/abs/2305.15272) by Jingfeng Yao, Xinggang Wang, Shusheng Yang, Baoyuan Wang.
+1. **[ViTMSN](https://huggingface.co/docs/transformers/model_doc/vit_msn)** (from Meta AI) released with the paper [Masked Siamese Networks for Label-Efficient Learning](https://arxiv.org/abs/2204.07141) by Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes, Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas.
 1. **[VITS](https://huggingface.co/docs/transformers/model_doc/vits)** (from Kakao Enterprise) released with the paper [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103) by Jaehyeon Kim, Jungil Kong, Juhee Son.
 1. **[Wav2Vec2](https://huggingface.co/docs/transformers/model_doc/wav2vec2)** (from Facebook AI) released with the paper [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477) by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli.
 1. **[Wav2Vec2-BERT](https://huggingface.co/docs/transformers/main/model_doc/wav2vec2-bert)** (from Meta AI) released with the paper [Seamless: Multilingual Expressive and Streaming Speech Translation](https://ai.meta.com/research/publications/seamless-multilingual-expressive-and-streaming-speech-translation/) by the Seamless Communication team.
diff --git a/docs/scripts/build_readme.py b/docs/scripts/build_readme.py
index 44faf1a77..611c5b3f6 100644
--- a/docs/scripts/build_readme.py
+++ b/docs/scripts/build_readme.py
@@ -5,19 +5,29 @@
 <p align="center">
     <br/>
     <picture> 
-        <source media="(prefers-color-scheme: dark)" srcset="https://github.com/xenova/transformers.js/assets/26504141/bd047e0f-aca9-4ff7-ba07-c7ca55442bc4" width="500" style="max-width: 100%;">
-        <source media="(prefers-color-scheme: light)" srcset="https://github.com/xenova/transformers.js/assets/26504141/84a5dc78-f4ea-43f4-96f2-b8c791f30a8e" width="500" style="max-width: 100%;">
-        <img alt="transformers.js javascript library logo" src="https://github.com/xenova/transformers.js/assets/26504141/84a5dc78-f4ea-43f4-96f2-b8c791f30a8e" width="500" style="max-width: 100%;">
+        <source media="(prefers-color-scheme: dark)" srcset="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/transformersjs-dark.svg" width="500" style="max-width: 100%;">
+        <source media="(prefers-color-scheme: light)" srcset="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/transformersjs-light.svg" width="500" style="max-width: 100%;">
+        <img alt="transformers.js javascript library logo" src="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/transformersjs-light.svg" width="500" style="max-width: 100%;">
     </picture>
     <br/>
 </p>
 
 <p align="center">
-    <a href="https://www.npmjs.com/package/@xenova/transformers"><img alt="NPM" src="https://img.shields.io/npm/v/@xenova/transformers"></a>
-    <a href="https://www.npmjs.com/package/@xenova/transformers"><img alt="NPM Downloads" src="https://img.shields.io/npm/dw/@xenova/transformers"></a>
-    <a href="https://www.jsdelivr.com/package/npm/@xenova/transformers"><img alt="jsDelivr Hits" src="https://img.shields.io/jsdelivr/npm/hw/@xenova/transformers"></a>
-    <a href="https://github.com/xenova/transformers.js/blob/main/LICENSE"><img alt="License" src="https://img.shields.io/github/license/xenova/transformers.js?color=blue"></a>
-    <a href="https://huggingface.co/docs/transformers.js/index"><img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers.js/index.svg?down_color=red&down_message=offline&up_message=online"></a>
+    <a href="https://www.npmjs.com/package/@huggingface/transformers">
+        <img alt="NPM" src="https://img.shields.io/npm/v/@huggingface/transformers">
+    </a>
+    <a href="https://www.npmjs.com/package/@huggingface/transformers">
+        <img alt="NPM Downloads" src="https://img.shields.io/npm/dw/@huggingface/transformers">
+    </a>
+    <a href="https://www.jsdelivr.com/package/npm/@huggingface/transformers">
+        <img alt="jsDelivr Hits" src="https://img.shields.io/jsdelivr/npm/hw/@huggingface/transformers">
+    </a>
+    <a href="https://github.com/huggingface/transformers.js/blob/main/LICENSE">
+        <img alt="License" src="https://img.shields.io/github/license/huggingface/transformers.js?color=blue">
+    </a>
+    <a href="https://huggingface.co/docs/transformers.js/index">
+        <img alt="Documentation" src="https://img.shields.io/website/http/huggingface.co/docs/transformers.js/index.svg?down_color=red&down_message=offline&up_message=online">
+    </a>
 </p>
 
 {intro}
@@ -42,7 +52,7 @@
 
 Here is the list of all tasks and architectures currently supported by Transformers.js.
 If you don't see your task/model listed here or it is not yet supported, feel free
-to open up a feature request [here](https://github.com/xenova/transformers.js/issues/new/choose).
+to open up a feature request [here](https://github.com/huggingface/transformers.js/issues/new/choose).
 
 To find compatible models on the Hub, select the "transformers.js" library tag in the filter menu (or visit [this link](https://huggingface.co/models?library=transformers.js)).
 You can refine your search by selecting the task you're interested in (e.g., [text-classification](https://huggingface.co/models?pipeline_tag=text-classification&library=transformers.js)).
diff --git a/docs/snippets/0_introduction.snippet b/docs/snippets/0_introduction.snippet
index a0ede3821..d25a0e513 100644
--- a/docs/snippets/0_introduction.snippet
+++ b/docs/snippets/0_introduction.snippet
@@ -3,9 +3,9 @@ State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in
 
 Transformers.js is designed to be functionally equivalent to Hugging Face's [transformers](https://github.com/huggingface/transformers) python library, meaning you can run the same pretrained models using a very similar API. These models support common tasks in different modalities, such as:
   - 📝 **Natural Language Processing**: text classification, named entity recognition, question answering, language modeling, summarization, translation, multiple choice, and text generation.
-  - 🖼️ **Computer Vision**: image classification, object detection, and segmentation.
-  - 🗣️ **Audio**: automatic speech recognition and audio classification.
-  - 🐙 **Multimodal**: zero-shot image classification.
+  - 🖼️ **Computer Vision**: image classification, object detection, segmentation, and depth estimation.
+  - 🗣️ **Audio**: automatic speech recognition, audio classification, and text-to-speech.
+  - 🐙 **Multimodal**: embeddings, zero-shot audio classification, zero-shot image classification, and zero-shot object detection.
 
 Transformers.js uses [ONNX Runtime](https://onnxruntime.ai/) to run models in the browser. The best part about it, is that you can easily [convert](#convert-your-models-to-onnx) your pretrained PyTorch, TensorFlow, or JAX models to ONNX using [🤗 Optimum](https://github.com/huggingface/optimum#onnx--onnx-runtime). 
 
diff --git a/docs/snippets/1_quick-tour.snippet b/docs/snippets/1_quick-tour.snippet
index dec6b341f..2e906a0f1 100644
--- a/docs/snippets/1_quick-tour.snippet
+++ b/docs/snippets/1_quick-tour.snippet
@@ -23,7 +23,7 @@ out = pipe('I love transformers!')
 <td>
 
 ```javascript
-import { pipeline } from '@xenova/transformers';
+import { pipeline } from '@huggingface/transformers';
 
 // Allocate a pipeline for sentiment-analysis
 let pipe = await pipeline('sentiment-analysis');
diff --git a/docs/snippets/2_installation.snippet b/docs/snippets/2_installation.snippet
index 5f739c98f..6c8b6146e 100644
--- a/docs/snippets/2_installation.snippet
+++ b/docs/snippets/2_installation.snippet
@@ -1,12 +1,12 @@
 
-To install via [NPM](https://www.npmjs.com/package/@xenova/transformers), run:
+To install via [NPM](https://www.npmjs.com/package/@huggingface/transformers), run:
 ```bash
-npm i @xenova/transformers
+npm i @huggingface/transformers
 ```
 
 Alternatively, you can use it in vanilla JS, without any bundler, by using a CDN or static hosting. For example, using [ES Modules](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules), you can import the library with:
 ```html
 <script type="module">
-    import { pipeline } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
+    import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0';
 </script>
 ```
diff --git a/docs/snippets/3_examples.snippet b/docs/snippets/3_examples.snippet
index 1ee5cc49a..f8bf7ed1c 100644
--- a/docs/snippets/3_examples.snippet
+++ b/docs/snippets/3_examples.snippet
@@ -4,17 +4,17 @@ Want to jump straight in? Get started with one of our sample applications/templa
 |-------------------|----------------------------------|-------------------------------|
 | Whisper Web       | Speech recognition w/ Whisper    | [code](https://github.com/xenova/whisper-web), [demo](https://huggingface.co/spaces/Xenova/whisper-web) |
 | Doodle Dash       | Real-time sketch-recognition game | [blog](https://huggingface.co/blog/ml-web-games), [code](https://github.com/xenova/doodle-dash), [demo](https://huggingface.co/spaces/Xenova/doodle-dash) |
-| Code Playground   | In-browser code completion website | [code](https://github.com/xenova/transformers.js/tree/main/examples/code-completion/), [demo](https://huggingface.co/spaces/Xenova/ai-code-playground) |
-| Semantic Image Search (client-side) | Search for images with text | [code](https://github.com/xenova/transformers.js/tree/main/examples/semantic-image-search-client/), [demo](https://huggingface.co/spaces/Xenova/semantic-image-search-client) |
-| Semantic Image Search (server-side) | Search for images with text (Supabase) | [code](https://github.com/xenova/transformers.js/tree/main/examples/semantic-image-search/), [demo](https://huggingface.co/spaces/Xenova/semantic-image-search) |
-| Vanilla JavaScript | In-browser object detection     | [video](https://scrimba.com/scrim/cKm9bDAg), [code](https://github.com/xenova/transformers.js/tree/main/examples/vanilla-js/), [demo](https://huggingface.co/spaces/Scrimba/vanilla-js-object-detector) |
-| React             | Multilingual translation website | [code](https://github.com/xenova/transformers.js/tree/main/examples/react-translator/), [demo](https://huggingface.co/spaces/Xenova/react-translator) |
-| Text to speech (client-side) | In-browser speech synthesis | [code](https://github.com/xenova/transformers.js/tree/main/examples/text-to-speech-client/), [demo](https://huggingface.co/spaces/Xenova/text-to-speech-client) |
-| Browser extension | Text classification extension    | [code](https://github.com/xenova/transformers.js/tree/main/examples/extension/) |
-| Electron          | Text classification application  | [code](https://github.com/xenova/transformers.js/tree/main/examples/electron/)  |
-| Next.js (client-side) | Sentiment analysis (in-browser inference) | [code](https://github.com/xenova/transformers.js/tree/main/examples/next-client/), [demo](https://huggingface.co/spaces/Xenova/next-example-app) |
-| Next.js (server-side) | Sentiment analysis (Node.js inference) | [code](https://github.com/xenova/transformers.js/tree/main/examples/next-server/), [demo](https://huggingface.co/spaces/Xenova/next-server-example-app) |
-| Node.js           | Sentiment analysis API           | [code](https://github.com/xenova/transformers.js/tree/main/examples/node/)      |
-| Demo site         | A collection of demos | [code](https://github.com/xenova/transformers.js/tree/main/examples/demo-site/), [demo](https://xenova.github.io/transformers.js/) |
+| Code Playground   | In-browser code completion website | [code](https://github.com/huggingface/transformers.js/tree/main/examples/code-completion/), [demo](https://huggingface.co/spaces/Xenova/ai-code-playground) |
+| Semantic Image Search (client-side) | Search for images with text | [code](https://github.com/huggingface/transformers.js/tree/main/examples/semantic-image-search-client/), [demo](https://huggingface.co/spaces/Xenova/semantic-image-search-client) |
+| Semantic Image Search (server-side) | Search for images with text (Supabase) | [code](https://github.com/huggingface/transformers.js/tree/main/examples/semantic-image-search/), [demo](https://huggingface.co/spaces/Xenova/semantic-image-search) |
+| Vanilla JavaScript | In-browser object detection     | [video](https://scrimba.com/scrim/cKm9bDAg), [code](https://github.com/huggingface/transformers.js/tree/main/examples/vanilla-js/), [demo](https://huggingface.co/spaces/Scrimba/vanilla-js-object-detector) |
+| React             | Multilingual translation website | [code](https://github.com/huggingface/transformers.js/tree/main/examples/react-translator/), [demo](https://huggingface.co/spaces/Xenova/react-translator) |
+| Text to speech (client-side) | In-browser speech synthesis | [code](https://github.com/huggingface/transformers.js/tree/main/examples/text-to-speech-client/), [demo](https://huggingface.co/spaces/Xenova/text-to-speech-client) |
+| Browser extension | Text classification extension    | [code](https://github.com/huggingface/transformers.js/tree/main/examples/extension/) |
+| Electron          | Text classification application  | [code](https://github.com/huggingface/transformers.js/tree/main/examples/electron/)  |
+| Next.js (client-side) | Sentiment analysis (in-browser inference) | [code](https://github.com/huggingface/transformers.js/tree/main/examples/next-client/), [demo](https://huggingface.co/spaces/Xenova/next-example-app) |
+| Next.js (server-side) | Sentiment analysis (Node.js inference) | [code](https://github.com/huggingface/transformers.js/tree/main/examples/next-server/), [demo](https://huggingface.co/spaces/Xenova/next-server-example-app) |
+| Node.js           | Sentiment analysis API           | [code](https://github.com/huggingface/transformers.js/tree/main/examples/node/)      |
+| Demo site         | A collection of demos | [code](https://github.com/huggingface/transformers.js/tree/main/examples/demo-site/), [demo](https://xenova.github.io/transformers.js/) |
 
 Check out the Transformers.js [template](https://huggingface.co/new-space?template=static-templates%2Ftransformers.js) on Hugging Face to get started in one click!
diff --git a/docs/snippets/4_custom-usage.snippet b/docs/snippets/4_custom-usage.snippet
index 787c8f579..d272c7617 100644
--- a/docs/snippets/4_custom-usage.snippet
+++ b/docs/snippets/4_custom-usage.snippet
@@ -1,12 +1,11 @@
 
 
-By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2/dist/), which should work out-of-the-box. You can customize this as follows:
-
+By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0/dist/), which should work out-of-the-box. You can customize this as follows:
 
 ### Settings
 
 ```javascript
-import { env } from '@xenova/transformers';
+import { env } from '@huggingface/transformers';
 
 // Specify a custom location for models (defaults to '/models/').
 env.localModelPath = '/path/to/models/';
@@ -22,7 +21,7 @@ For a full list of available settings, check out the [API Reference](./api/env).
 
 ### Convert your models to ONNX
 
-We recommend using our [conversion script](https://github.com/xenova/transformers.js/blob/main/scripts/convert.py) to convert your PyTorch, TensorFlow, or JAX models to ONNX in a single command. Behind the scenes, it uses [🤗 Optimum](https://huggingface.co/docs/optimum) to perform conversion and quantization of your model.
+We recommend using our [conversion script](https://github.com/huggingface/transformers.js/blob/main/scripts/convert.py) to convert your PyTorch, TensorFlow, or JAX models to ONNX in a single command. Behind the scenes, it uses [🤗 Optimum](https://huggingface.co/docs/optimum) to perform conversion and quantization of your model.
 
 ```bash
 python -m scripts.convert --quantize --model_id <model_name_or_path>
diff --git a/docs/snippets/6_supported-models.snippet b/docs/snippets/6_supported-models.snippet
index f8ad89ae0..f1bcdad44 100644
--- a/docs/snippets/6_supported-models.snippet
+++ b/docs/snippets/6_supported-models.snippet
@@ -16,6 +16,7 @@
 1. **[CLIPSeg](https://huggingface.co/docs/transformers/model_doc/clipseg)** (from University of Göttingen) released with the paper [Image Segmentation Using Text and Image Prompts](https://arxiv.org/abs/2112.10003) by Timo Lüddecke and Alexander Ecker.
 1. **[CodeGen](https://huggingface.co/docs/transformers/model_doc/codegen)** (from Salesforce) released with the paper [A Conversational Paradigm for Program Synthesis](https://arxiv.org/abs/2203.13474) by Erik Nijkamp, Bo Pang, Hiroaki Hayashi, Lifu Tu, Huan Wang, Yingbo Zhou, Silvio Savarese, Caiming Xiong.
 1. **[CodeLlama](https://huggingface.co/docs/transformers/model_doc/llama_code)** (from MetaAI) released with the paper [Code Llama: Open Foundation Models for Code](https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/) by Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan, Yossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, Artyom Kozhevnikov, Ivan Evtimov, Joanna Bitton, Manish Bhatt, Cristian Canton Ferrer, Aaron Grattafiori, Wenhan Xiong, Alexandre Défossez, Jade Copet, Faisal Azhar, Hugo Touvron, Louis Martin, Nicolas Usunier, Thomas Scialom, Gabriel Synnaeve.
+1. **[Cohere](https://huggingface.co/docs/transformers/main/model_doc/cohere)** (from Cohere) released with the paper [Command-R: Retrieval Augmented Generation at Production Scale](<https://txt.cohere.com/command-r/>) by Cohere.
 1. **[ConvBERT](https://huggingface.co/docs/transformers/model_doc/convbert)** (from YituTech) released with the paper [ConvBERT: Improving BERT with Span-based Dynamic Convolution](https://arxiv.org/abs/2008.02496) by Zihang Jiang, Weihao Yu, Daquan Zhou, Yunpeng Chen, Jiashi Feng, Shuicheng Yan.
 1. **[ConvNeXT](https://huggingface.co/docs/transformers/model_doc/convnext)** (from Facebook AI) released with the paper [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) by Zhuang Liu, Hanzi Mao, Chao-Yuan Wu, Christoph Feichtenhofer, Trevor Darrell, Saining Xie.
 1. **[ConvNeXTV2](https://huggingface.co/docs/transformers/model_doc/convnextv2)** (from Facebook AI) released with the paper [ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders](https://arxiv.org/abs/2301.00808) by Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon, Saining Xie.
@@ -24,6 +25,7 @@
 1. **[Decision Transformer](https://huggingface.co/docs/transformers/model_doc/decision_transformer)** (from Berkeley/Facebook/Google) released with the paper [Decision Transformer: Reinforcement Learning via Sequence Modeling](https://arxiv.org/abs/2106.01345) by Lili Chen, Kevin Lu, Aravind Rajeswaran, Kimin Lee, Aditya Grover, Michael Laskin, Pieter Abbeel, Aravind Srinivas, Igor Mordatch.
 1. **[DeiT](https://huggingface.co/docs/transformers/model_doc/deit)** (from Facebook) released with the paper [Training data-efficient image transformers & distillation through attention](https://arxiv.org/abs/2012.12877) by Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, Hervé Jégou.
 1. **[Depth Anything](https://huggingface.co/docs/transformers/main/model_doc/depth_anything)** (from University of Hong Kong and TikTok) released with the paper [Depth Anything: Unleashing the Power of Large-Scale Unlabeled Data](https://arxiv.org/abs/2401.10891) by Lihe Yang, Bingyi Kang, Zilong Huang, Xiaogang Xu, Jiashi Feng, Hengshuang Zhao.
+1. **Depth Pro** (from Apple) released with the paper [Depth Pro: Sharp Monocular Metric Depth in Less Than a Second](https://arxiv.org/abs/2410.02073) by Aleksei Bochkovskii, Amaël Delaunoy, Hugo Germain, Marcel Santos, Yichao Zhou, Stephan R. Richter, Vladlen Koltun.
 1. **[DETR](https://huggingface.co/docs/transformers/model_doc/detr)** (from Facebook) released with the paper [End-to-End Object Detection with Transformers](https://arxiv.org/abs/2005.12872) by Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov, Sergey Zagoruyko.
 1. **[DINOv2](https://huggingface.co/docs/transformers/model_doc/dinov2)** (from Meta AI) released with the paper [DINOv2: Learning Robust Visual Features without Supervision](https://arxiv.org/abs/2304.07193) by Maxime Oquab, Timothée Darcet, Théo Moutakanni, Huy Vo, Marc Szafraniec, Vasil Khalidov, Pierre Fernandez, Daniel Haziza, Francisco Massa, Alaaeldin El-Nouby, Mahmoud Assran, Nicolas Ballas, Wojciech Galuba, Russell Howes, Po-Yao Huang, Shang-Wen Li, Ishan Misra, Michael Rabbat, Vasu Sharma, Gabriel Synnaeve, Hu Xu, Hervé Jegou, Julien Mairal, Patrick Labatut, Armand Joulin, Piotr Bojanowski.
 1. **[DistilBERT](https://huggingface.co/docs/transformers/model_doc/distilbert)** (from HuggingFace), released together with the paper [DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter](https://arxiv.org/abs/1910.01108) by Victor Sanh, Lysandre Debut and Thomas Wolf. The same method has been applied to compress GPT2 into [DistilGPT2](https://github.com/huggingface/transformers/tree/main/examples/research_projects/distillation), RoBERTa into [DistilRoBERTa](https://github.com/huggingface/transformers/tree/main/examples/research_projects/distillation), Multilingual BERT into [DistilmBERT](https://github.com/huggingface/transformers/tree/main/examples/research_projects/distillation) and a German version of DistilBERT.
@@ -36,39 +38,61 @@
 1. **[Falcon](https://huggingface.co/docs/transformers/model_doc/falcon)** (from Technology Innovation Institute) by Almazrouei, Ebtesam and Alobeidli, Hamza and Alshamsi, Abdulaziz and Cappelli, Alessandro and Cojocaru, Ruxandra and Debbah, Merouane and Goffinet, Etienne and Heslow, Daniel and Launay, Julien and Malartic, Quentin and Noune, Badreddine and Pannier, Baptiste and Penedo, Guilherme.
 1. **FastViT** (from Apple) released with the paper [FastViT: A Fast Hybrid Vision Transformer using Structural Reparameterization](https://arxiv.org/abs/2303.14189) by Pavan Kumar Anasosalu Vasu, James Gabriel, Jeff Zhu, Oncel Tuzel and Anurag Ranjan.
 1. **[FLAN-T5](https://huggingface.co/docs/transformers/model_doc/flan-t5)** (from Google AI) released in the repository [google-research/t5x](https://github.com/google-research/t5x/blob/main/docs/models.md#flan-t5-checkpoints) by Hyung Won Chung, Le Hou, Shayne Longpre, Barret Zoph, Yi Tay, William Fedus, Eric Li, Xuezhi Wang, Mostafa Dehghani, Siddhartha Brahma, Albert Webson, Shixiang Shane Gu, Zhuyun Dai, Mirac Suzgun, Xinyun Chen, Aakanksha Chowdhery, Sharan Narang, Gaurav Mishra, Adams Yu, Vincent Zhao, Yanping Huang, Andrew Dai, Hongkun Yu, Slav Petrov, Ed H. Chi, Jeff Dean, Jacob Devlin, Adam Roberts, Denny Zhou, Quoc V. Le, and Jason Wei
+1. **Florence2** (from Microsoft) released with the paper [Florence-2: Advancing a Unified Representation for a Variety of Vision Tasks](https://arxiv.org/abs/2311.06242) by Bin Xiao, Haiping Wu, Weijian Xu, Xiyang Dai, Houdong Hu, Yumao Lu, Michael Zeng, Ce Liu, Lu Yuan.
+1. **[Gemma](https://huggingface.co/docs/transformers/main/model_doc/gemma)** (from Google) released with the paper [Gemma: Open Models Based on Gemini Technology and Research](https://blog.google/technology/developers/gemma-open-models/) by the Gemma Google team.
+1. **[Gemma2](https://huggingface.co/docs/transformers/main/model_doc/gemma2)** (from Google) released with the paper [Gemma2: Open Models Based on Gemini Technology and Research](https://blog.google/technology/developers/google-gemma-2/) by the Gemma Google team.
 1. **[GLPN](https://huggingface.co/docs/transformers/model_doc/glpn)** (from KAIST) released with the paper [Global-Local Path Networks for Monocular Depth Estimation with Vertical CutDepth](https://arxiv.org/abs/2201.07436) by Doyeon Kim, Woonghyun Ga, Pyungwhan Ahn, Donggyu Joo, Sehwan Chun, Junmo Kim.
 1. **[GPT Neo](https://huggingface.co/docs/transformers/model_doc/gpt_neo)** (from EleutherAI) released in the repository [EleutherAI/gpt-neo](https://github.com/EleutherAI/gpt-neo) by Sid Black, Stella Biderman, Leo Gao, Phil Wang and Connor Leahy.
 1. **[GPT NeoX](https://huggingface.co/docs/transformers/model_doc/gpt_neox)** (from EleutherAI) released with the paper [GPT-NeoX-20B: An Open-Source Autoregressive Language Model](https://arxiv.org/abs/2204.06745) by Sid Black, Stella Biderman, Eric Hallahan, Quentin Anthony, Leo Gao, Laurence Golding, Horace He, Connor Leahy, Kyle McDonell, Jason Phang, Michael Pieler, USVSN Sai Prashanth, Shivanshu Purohit, Laria Reynolds, Jonathan Tow, Ben Wang, Samuel Weinbach
 1. **[GPT-2](https://huggingface.co/docs/transformers/model_doc/gpt2)** (from OpenAI) released with the paper [Language Models are Unsupervised Multitask Learners](https://blog.openai.com/better-language-models/) by Alec Radford*, Jeffrey Wu*, Rewon Child, David Luan, Dario Amodei** and Ilya Sutskever**.
 1. **[GPT-J](https://huggingface.co/docs/transformers/model_doc/gptj)** (from EleutherAI) released in the repository [kingoflolz/mesh-transformer-jax](https://github.com/kingoflolz/mesh-transformer-jax/) by Ben Wang and Aran Komatsuzaki.
 1. **[GPTBigCode](https://huggingface.co/docs/transformers/model_doc/gpt_bigcode)** (from BigCode) released with the paper [SantaCoder: don't reach for the stars!](https://arxiv.org/abs/2301.03988) by Loubna Ben Allal, Raymond Li, Denis Kocetkov, Chenghao Mou, Christopher Akiki, Carlos Munoz Ferrandis, Niklas Muennighoff, Mayank Mishra, Alex Gu, Manan Dey, Logesh Kumar Umapathi, Carolyn Jane Anderson, Yangtian Zi, Joel Lamy Poirier, Hailey Schoelkopf, Sergey Troshin, Dmitry Abulkhanov, Manuel Romero, Michael Lappert, Francesco De Toni, Bernardo García del Río, Qian Liu, Shamik Bose, Urvashi Bhattacharyya, Terry Yue Zhuo, Ian Yu, Paulo Villegas, Marco Zocca, Sourab Mangrulkar, David Lansky, Huu Nguyen, Danish Contractor, Luis Villa, Jia Li, Dzmitry Bahdanau, Yacine Jernite, Sean Hughes, Daniel Fried, Arjun Guha, Harm de Vries, Leandro von Werra.
+1. **[Granite](https://huggingface.co/docs/transformers/main/model_doc/granite)** (from IBM) released with the paper [Power Scheduler: A Batch Size and Token Number Agnostic Learning Rate Scheduler](https://arxiv.org/abs/2408.13359) by Yikang Shen, Matthew Stallone, Mayank Mishra, Gaoyuan Zhang, Shawn Tan, Aditya Prasad, Adriana Meza Soria, David D. Cox, Rameswar Panda.
+1. **[GroupViT](https://huggingface.co/docs/transformers/model_doc/groupvit)** (from UCSD, NVIDIA) released with the paper [GroupViT: Semantic Segmentation Emerges from Text Supervision](https://arxiv.org/abs/2202.11094) by Jiarui Xu, Shalini De Mello, Sifei Liu, Wonmin Byeon, Thomas Breuel, Jan Kautz, Xiaolong Wang.
 1. **[HerBERT](https://huggingface.co/docs/transformers/model_doc/herbert)** (from Allegro.pl, AGH University of Science and Technology) released with the paper [KLEJ: Comprehensive Benchmark for Polish Language Understanding](https://www.aclweb.org/anthology/2020.acl-main.111.pdf) by Piotr Rybak, Robert Mroczkowski, Janusz Tracz, Ireneusz Gawlik.
+1. **[Hiera](https://huggingface.co/docs/transformers/model_doc/hiera)** (from Meta) released with the paper [Hiera: A Hierarchical Vision Transformer without the Bells-and-Whistles](https://arxiv.org/pdf/2306.00989) by Chaitanya Ryali, Yuan-Ting Hu, Daniel Bolya, Chen Wei, Haoqi Fan, Po-Yao Huang, Vaibhav Aggarwal, Arkabandhu Chowdhury, Omid Poursaeed, Judy Hoffman, Jitendra Malik, Yanghao Li, Christoph Feichtenhofer.
 1. **[Hubert](https://huggingface.co/docs/transformers/model_doc/hubert)** (from Facebook) released with the paper [HuBERT: Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units](https://arxiv.org/abs/2106.07447) by Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai, Kushal Lakhotia, Ruslan Salakhutdinov, Abdelrahman Mohamed.
+1. **JAIS** (from Core42) released with the paper [Jais and Jais-chat: Arabic-Centric Foundation and Instruction-Tuned Open Generative Large Language Models](https://arxiv.org/pdf/2308.16149) by Neha Sengupta, Sunil Kumar Sahu, Bokang Jia, Satheesh Katipomu, Haonan Li, Fajri Koto, William Marshall, Gurpreet Gosal, Cynthia Liu, Zhiming Chen, Osama Mohammed Afzal, Samta Kamboj, Onkar Pandit, Rahul Pal, Lalit Pradhan, Zain Muhammad Mujahid, Massa Baali, Xudong Han, Sondos Mahmoud Bsharat, Alham Fikri Aji, Zhiqiang Shen, Zhengzhong Liu, Natalia Vassilieva, Joel Hestness, Andy Hock, Andrew Feldman, Jonathan Lee, Andrew Jackson, Hector Xuguang Ren, Preslav Nakov, Timothy Baldwin, Eric Xing.
 1. **[LongT5](https://huggingface.co/docs/transformers/model_doc/longt5)** (from Google AI) released with the paper [LongT5: Efficient Text-To-Text Transformer for Long Sequences](https://arxiv.org/abs/2112.07916) by Mandy Guo, Joshua Ainslie, David Uthus, Santiago Ontanon, Jianmo Ni, Yun-Hsuan Sung, Yinfei Yang.
 1. **[LLaMA](https://huggingface.co/docs/transformers/model_doc/llama)** (from The FAIR team of Meta AI) released with the paper [LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971) by Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timothée Lacroix, Baptiste Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, Aurelien Rodriguez, Armand Joulin, Edouard Grave, Guillaume Lample.
 1. **[Llama2](https://huggingface.co/docs/transformers/model_doc/llama2)** (from The FAIR team of Meta AI) released with the paper [Llama2: Open Foundation and Fine-Tuned Chat Models](https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/XXX) by Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, Dan Bikel, Lukas Blecher, Cristian Canton Ferrer, Moya Chen, Guillem Cucurull, David Esiobu, Jude Fernandes, Jeremy Fu, Wenyin Fu, Brian Fuller, Cynthia Gao, Vedanuj Goswami, Naman Goyal, Anthony Hartshorn, Saghar Hosseini, Rui Hou, Hakan Inan, Marcin Kardas, Viktor Kerkez Madian Khabsa, Isabel Kloumann, Artem Korenev, Punit Singh Koura, Marie-Anne Lachaux, Thibaut Lavril, Jenya Lee, Diana Liskovich, Yinghai Lu, Yuning Mao, Xavier Martinet, Todor Mihaylov, Pushka rMishra, Igor Molybog, Yixin Nie, Andrew Poulton, Jeremy Reizenstein, Rashi Rungta, Kalyan Saladi, Alan Schelten, Ruan Silva, Eric Michael Smith, Ranjan Subramanian, Xiaoqing EllenTan, Binh Tang, Ross Taylor, Adina Williams, Jian Xiang Kuan, Puxin Xu, Zheng Yan, Iliyan Zarov, Yuchen Zhang, Angela Fan, Melanie Kambadur, Sharan Narang, Aurelien Rodriguez, Robert Stojnic, Sergey Edunov, Thomas Scialom.
+1. **[LLaVa](https://huggingface.co/docs/transformers/model_doc/llava)** (from Microsoft Research & University of Wisconsin-Madison) released with the paper [Visual Instruction Tuning](https://arxiv.org/abs/2304.08485) by Haotian Liu, Chunyuan Li, Yuheng Li and Yong Jae Lee.
 1. **[M2M100](https://huggingface.co/docs/transformers/model_doc/m2m_100)** (from Facebook) released with the paper [Beyond English-Centric Multilingual Machine Translation](https://arxiv.org/abs/2010.11125) by Angela Fan, Shruti Bhosale, Holger Schwenk, Zhiyi Ma, Ahmed El-Kishky, Siddharth Goyal, Mandeep Baines, Onur Celebi, Guillaume Wenzek, Vishrav Chaudhary, Naman Goyal, Tom Birch, Vitaliy Liptchinsky, Sergey Edunov, Edouard Grave, Michael Auli, Armand Joulin.
 1. **[MarianMT](https://huggingface.co/docs/transformers/model_doc/marian)** Machine translation models trained using [OPUS](http://opus.nlpl.eu/) data by Jörg Tiedemann. The [Marian Framework](https://marian-nmt.github.io/) is being developed by the Microsoft Translator Team.
+1. **[MaskFormer](https://huggingface.co/docs/transformers/model_doc/maskformer)** (from Meta and UIUC) released with the paper [Per-Pixel Classification is Not All You Need for Semantic Segmentation](https://arxiv.org/abs/2107.06278) by Bowen Cheng, Alexander G. Schwing, Alexander Kirillov.
 1. **[mBART](https://huggingface.co/docs/transformers/model_doc/mbart)** (from Facebook) released with the paper [Multilingual Denoising Pre-training for Neural Machine Translation](https://arxiv.org/abs/2001.08210) by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov, Marjan Ghazvininejad, Mike Lewis, Luke Zettlemoyer.
 1. **[mBART-50](https://huggingface.co/docs/transformers/model_doc/mbart)** (from Facebook) released with the paper [Multilingual Translation with Extensible Multilingual Pretraining and Finetuning](https://arxiv.org/abs/2008.00401) by Yuqing Tang, Chau Tran, Xian Li, Peng-Jen Chen, Naman Goyal, Vishrav Chaudhary, Jiatao Gu, Angela Fan.
+1. **[MusicGen](https://huggingface.co/docs/transformers/model_doc/musicgen)** (from Meta) released with the paper [Simple and Controllable Music Generation](https://arxiv.org/abs/2306.05284) by Jade Copet, Felix Kreuk, Itai Gat, Tal Remez, David Kant, Gabriel Synnaeve, Yossi Adi and Alexandre Défossez.
 1. **[Mistral](https://huggingface.co/docs/transformers/model_doc/mistral)** (from Mistral AI) by The [Mistral AI](https://mistral.ai) team: Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lélio Renard Lavaud, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.
 1. **[MMS](https://huggingface.co/docs/transformers/model_doc/mms)** (from Facebook) released with the paper [Scaling Speech Technology to 1,000+ Languages](https://arxiv.org/abs/2305.13516) by Vineel Pratap, Andros Tjandra, Bowen Shi, Paden Tomasello, Arun Babu, Sayani Kundu, Ali Elkahky, Zhaoheng Ni, Apoorv Vyas, Maryam Fazel-Zarandi, Alexei Baevski, Yossi Adi, Xiaohui Zhang, Wei-Ning Hsu, Alexis Conneau, Michael Auli.
 1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
+1. **MobileCLIP** (from Apple) released with the paper [MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training](https://arxiv.org/abs/2311.17049) by Pavan Kumar Anasosalu Vasu, Hadi Pouransari, Fartash Faghri, Raviteja Vemulapalli, Oncel Tuzel.
+1. **[MobileNetV1](https://huggingface.co/docs/transformers/model_doc/mobilenet_v1)** (from Google Inc.) released with the paper [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861) by Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam.
+1. **[MobileNetV2](https://huggingface.co/docs/transformers/model_doc/mobilenet_v2)** (from Google Inc.) released with the paper [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) by Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen.
+1. **MobileNetV3** (from Google Inc.) released with the paper [Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) by Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam.
+1. **MobileNetV4** (from Google Inc.) released with the paper [MobileNetV4 - Universal Models for the Mobile Ecosystem](https://arxiv.org/abs/2404.10518) by Danfeng Qin, Chas Leichner, Manolis Delakis, Marco Fornoni, Shixin Luo, Fan Yang, Weijun Wang, Colby Banbury, Chengxi Ye, Berkin Akin, Vaibhav Aggarwal, Tenghui Zhu, Daniele Moro, Andrew Howard.
 1. **[MobileViT](https://huggingface.co/docs/transformers/model_doc/mobilevit)** (from Apple) released with the paper [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari.
 1. **[MobileViTV2](https://huggingface.co/docs/transformers/model_doc/mobilevitv2)** (from Apple) released with the paper [Separable Self-attention for Mobile Vision Transformers](https://arxiv.org/abs/2206.02680) by Sachin Mehta and Mohammad Rastegari.
+1. **Moondream1** released in the repository [moondream](https://github.com/vikhyat/moondream) by vikhyat.
 1. **[MPNet](https://huggingface.co/docs/transformers/model_doc/mpnet)** (from Microsoft Research) released with the paper [MPNet: Masked and Permuted Pre-training for Language Understanding](https://arxiv.org/abs/2004.09297) by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
 1. **[MPT](https://huggingface.co/docs/transformers/model_doc/mpt)** (from MosaiML) released with the repository [llm-foundry](https://github.com/mosaicml/llm-foundry/) by the MosaicML NLP Team.
 1. **[MT5](https://huggingface.co/docs/transformers/model_doc/mt5)** (from Google AI) released with the paper [mT5: A massively multilingual pre-trained text-to-text transformer](https://arxiv.org/abs/2010.11934) by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, Colin Raffel.
 1. **[NLLB](https://huggingface.co/docs/transformers/model_doc/nllb)** (from Meta) released with the paper [No Language Left Behind: Scaling Human-Centered Machine Translation](https://arxiv.org/abs/2207.04672) by the NLLB team.
 1. **[Nougat](https://huggingface.co/docs/transformers/model_doc/nougat)** (from Meta AI) released with the paper [Nougat: Neural Optical Understanding for Academic Documents](https://arxiv.org/abs/2308.13418) by Lukas Blecher, Guillem Cucurull, Thomas Scialom, Robert Stojnic.
+1. **OpenELM** (from Apple) released with the paper [OpenELM: An Efficient Language Model Family with Open-source Training and Inference Framework](https://arxiv.org/abs/2404.14619) by Sachin Mehta, Mohammad Hossein Sekhavat, Qingqing Cao, Maxwell Horton, Yanzi Jin, Chenfan Sun, Iman Mirzadeh, Mahyar Najibi, Dmitry Belenko, Peter Zatloukal, Mohammad Rastegari.
 1. **[OPT](https://huggingface.co/docs/transformers/master/model_doc/opt)** (from Meta AI) released with the paper [OPT: Open Pre-trained Transformer Language Models](https://arxiv.org/abs/2205.01068) by Susan Zhang, Stephen Roller, Naman Goyal, Mikel Artetxe, Moya Chen, Shuohui Chen et al.
 1. **[OWL-ViT](https://huggingface.co/docs/transformers/model_doc/owlvit)** (from Google AI) released with the paper [Simple Open-Vocabulary Object Detection with Vision Transformers](https://arxiv.org/abs/2205.06230) by Matthias Minderer, Alexey Gritsenko, Austin Stone, Maxim Neumann, Dirk Weissenborn, Alexey Dosovitskiy, Aravindh Mahendran, Anurag Arnab, Mostafa Dehghani, Zhuoran Shen, Xiao Wang, Xiaohua Zhai, Thomas Kipf, and Neil Houlsby.
 1. **[OWLv2](https://huggingface.co/docs/transformers/model_doc/owlv2)** (from Google AI) released with the paper [Scaling Open-Vocabulary Object Detection](https://arxiv.org/abs/2306.09683) by Matthias Minderer, Alexey Gritsenko, Neil Houlsby.
 1. **[Phi](https://huggingface.co/docs/transformers/main/model_doc/phi)** (from Microsoft) released with the papers - [Textbooks Are All You Need](https://arxiv.org/abs/2306.11644) by Suriya Gunasekar, Yi Zhang, Jyoti Aneja, Caio César Teodoro Mendes, Allie Del Giorno, Sivakanth Gopi, Mojan Javaheripi, Piero Kauffmann, Gustavo de Rosa, Olli Saarikivi, Adil Salim, Shital Shah, Harkirat Singh Behl, Xin Wang, Sébastien Bubeck, Ronen Eldan, Adam Tauman Kalai, Yin Tat Lee and Yuanzhi Li, [Textbooks Are All You Need II: phi-1.5 technical report](https://arxiv.org/abs/2309.05463) by Yuanzhi Li, Sébastien Bubeck, Ronen Eldan, Allie Del Giorno, Suriya Gunasekar and Yin Tat Lee.
+1. **[Phi3](https://huggingface.co/docs/transformers/main/model_doc/phi3)** (from Microsoft) released with the paper [Phi-3 Technical Report: A Highly Capable Language Model Locally on Your Phone](https://arxiv.org/abs/2404.14219) by Marah Abdin, Sam Ade Jacobs, Ammar Ahmad Awan, Jyoti Aneja, Ahmed Awadallah, Hany Awadalla, Nguyen Bach, Amit Bahree, Arash Bakhtiari, Harkirat Behl, Alon Benhaim, Misha Bilenko, Johan Bjorck, Sébastien Bubeck, Martin Cai, Caio César Teodoro Mendes, Weizhu Chen, Vishrav Chaudhary, Parul Chopra, Allie Del Giorno, Gustavo de Rosa, Matthew Dixon, Ronen Eldan, Dan Iter, Amit Garg, Abhishek Goswami, Suriya Gunasekar, Emman Haider, Junheng Hao, Russell J. Hewett, Jamie Huynh, Mojan Javaheripi, Xin Jin, Piero Kauffmann, Nikos Karampatziakis, Dongwoo Kim, Mahoud Khademi, Lev Kurilenko, James R. Lee, Yin Tat Lee, Yuanzhi Li, Chen Liang, Weishung Liu, Eric Lin, Zeqi Lin, Piyush Madan, Arindam Mitra, Hardik Modi, Anh Nguyen, Brandon Norick, Barun Patra, Daniel Perez-Becker, Thomas Portet, Reid Pryzant, Heyang Qin, Marko Radmilac, Corby Rosset, Sambudha Roy, Olatunji Ruwase, Olli Saarikivi, Amin Saied, Adil Salim, Michael Santacroce, Shital Shah, Ning Shang, Hiteshi Sharma, Xia Song, Masahiro Tanaka, Xin Wang, Rachel Ward, Guanhua Wang, Philipp Witte, Michael Wyatt, Can Xu, Jiahang Xu, Sonali Yadav, Fan Yang, Ziyi Yang, Donghan Yu, Chengruidong Zhang, Cyril Zhang, Jianwen Zhang, Li Lyna Zhang, Yi Zhang, Yue Zhang, Yunan Zhang, Xiren Zhou.
+1. **[PVT](https://huggingface.co/docs/transformers/main/model_doc/pvt)** (from Nanjing University, The University of Hong Kong etc.) released with the paper [Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions](https://arxiv.org/pdf/2102.12122.pdf) by Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao.
+1. **PyAnnote** released in the repository [pyannote/pyannote-audio](https://github.com/pyannote/pyannote-audio) by Hervé Bredin.
 1. **[Qwen2](https://huggingface.co/docs/transformers/model_doc/qwen2)** (from the Qwen team, Alibaba Group) released with the paper [Qwen Technical Report](https://arxiv.org/abs/2309.16609) by Jinze Bai, Shuai Bai, Yunfei Chu, Zeyu Cui, Kai Dang, Xiaodong Deng, Yang Fan, Wenbin Ge, Yu Han, Fei Huang, Binyuan Hui, Luo Ji, Mei Li, Junyang Lin, Runji Lin, Dayiheng Liu, Gao Liu, Chengqiang Lu, Keming Lu, Jianxin Ma, Rui Men, Xingzhang Ren, Xuancheng Ren, Chuanqi Tan, Sinan Tan, Jianhong Tu, Peng Wang, Shijie Wang, Wei Wang, Shengguang Wu, Benfeng Xu, Jin Xu, An Yang, Hao Yang, Jian Yang, Shusheng Yang, Yang Yao, Bowen Yu, Hongyi Yuan, Zheng Yuan, Jianwei Zhang, Xingxuan Zhang, Yichang Zhang, Zhenru Zhang, Chang Zhou, Jingren Zhou, Xiaohuan Zhou and Tianhang Zhu.
 1. **[ResNet](https://huggingface.co/docs/transformers/model_doc/resnet)** (from Microsoft Research) released with the paper [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) by Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun.
 1. **[RoBERTa](https://huggingface.co/docs/transformers/model_doc/roberta)** (from Facebook), released together with the paper [RoBERTa: A Robustly Optimized BERT Pretraining Approach](https://arxiv.org/abs/1907.11692) by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov.
 1. **[RoFormer](https://huggingface.co/docs/transformers/model_doc/roformer)** (from ZhuiyiTechnology), released together with the paper [RoFormer: Enhanced Transformer with Rotary Position Embedding](https://arxiv.org/abs/2104.09864) by Jianlin Su and Yu Lu and Shengfeng Pan and Bo Wen and Yunfeng Liu.
+1. **[RT-DETR](https://huggingface.co/docs/transformers/model_doc/rt_detr)** (from Baidu), released together with the paper [DETRs Beat YOLOs on Real-time Object Detection](https://arxiv.org/abs/2304.08069) by Yian Zhao, Wenyu Lv, Shangliang Xu, Jinman Wei, Guanzhong Wang, Qingqing Dang, Yi Liu, Jie Chen.
+1. **Sapiens** (from Meta AI) released with the paper [Sapiens: Foundation for Human Vision Models](https://arxiv.org/pdf/2408.12569) by Rawal Khirodkar, Timur Bagautdinov, Julieta Martinez, Su Zhaoen, Austin James, Peter Selednik, Stuart Anderson, Shunsuke Saito.
 1. **[SegFormer](https://huggingface.co/docs/transformers/model_doc/segformer)** (from NVIDIA) released with the paper [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) by Enze Xie, Wenhai Wang, Zhiding Yu, Anima Anandkumar, Jose M. Alvarez, Ping Luo.
 1. **[Segment Anything](https://huggingface.co/docs/transformers/model_doc/sam)** (from Meta AI) released with the paper [Segment Anything](https://arxiv.org/pdf/2304.02643v1.pdf) by Alexander Kirillov, Eric Mintun, Nikhila Ravi, Hanzi Mao, Chloe Rolland, Laura Gustafson, Tete Xiao, Spencer Whitehead, Alex Berg, Wan-Yen Lo, Piotr Dollar, Ross Girshick.
 1. **[SigLIP](https://huggingface.co/docs/transformers/main/model_doc/siglip)** (from Google AI) released with the paper [Sigmoid Loss for Language Image Pre-Training](https://arxiv.org/abs/2303.15343) by Xiaohua Zhai, Basil Mustafa, Alexander Kolesnikov, Lucas Beyer.
@@ -85,7 +109,9 @@
 1. **[UniSpeech](https://huggingface.co/docs/transformers/model_doc/unispeech)** (from Microsoft Research) released with the paper [UniSpeech: Unified Speech Representation Learning with Labeled and Unlabeled Data](https://arxiv.org/abs/2101.07597) by Chengyi Wang, Yu Wu, Yao Qian, Kenichi Kumatani, Shujie Liu, Furu Wei, Michael Zeng, Xuedong Huang.
 1. **[UniSpeechSat](https://huggingface.co/docs/transformers/model_doc/unispeech-sat)** (from Microsoft Research) released with the paper [UNISPEECH-SAT: UNIVERSAL SPEECH REPRESENTATION LEARNING WITH SPEAKER AWARE PRE-TRAINING](https://arxiv.org/abs/2110.05752) by Sanyuan Chen, Yu Wu, Chengyi Wang, Zhengyang Chen, Zhuo Chen, Shujie Liu, Jian Wu, Yao Qian, Furu Wei, Jinyu Li, Xiangzhan Yu.
 1. **[Vision Transformer (ViT)](https://huggingface.co/docs/transformers/model_doc/vit)** (from Google AI) released with the paper [An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/abs/2010.11929) by Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby.
+1. **[ViTMAE](https://huggingface.co/docs/transformers/model_doc/vit_mae)** (from Meta AI) released with the paper [Masked Autoencoders Are Scalable Vision Learners](https://arxiv.org/abs/2111.06377) by Kaiming He, Xinlei Chen, Saining Xie, Yanghao Li, Piotr Dollár, Ross Girshick.
 1. **[ViTMatte](https://huggingface.co/docs/transformers/model_doc/vitmatte)** (from HUST-VL) released with the paper [ViTMatte: Boosting Image Matting with Pretrained Plain Vision Transformers](https://arxiv.org/abs/2305.15272) by Jingfeng Yao, Xinggang Wang, Shusheng Yang, Baoyuan Wang.
+1. **[ViTMSN](https://huggingface.co/docs/transformers/model_doc/vit_msn)** (from Meta AI) released with the paper [Masked Siamese Networks for Label-Efficient Learning](https://arxiv.org/abs/2204.07141) by Mahmoud Assran, Mathilde Caron, Ishan Misra, Piotr Bojanowski, Florian Bordes, Pascal Vincent, Armand Joulin, Michael Rabbat, Nicolas Ballas.
 1. **[VITS](https://huggingface.co/docs/transformers/model_doc/vits)** (from Kakao Enterprise) released with the paper [Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech](https://arxiv.org/abs/2106.06103) by Jaehyeon Kim, Jungil Kong, Juhee Son.
 1. **[Wav2Vec2](https://huggingface.co/docs/transformers/model_doc/wav2vec2)** (from Facebook AI) released with the paper [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477) by Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli.
 1. **[Wav2Vec2-BERT](https://huggingface.co/docs/transformers/main/model_doc/wav2vec2-bert)** (from Meta AI) released with the paper [Seamless: Multilingual Expressive and Streaming Speech Translation](https://ai.meta.com/research/publications/seamless-multilingual-expressive-and-streaming-speech-translation/) by the Seamless Communication team.
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index 1fe9150f6..4458c049b 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -48,6 +48,21 @@
       title: ONNX
     title: Backends
     isExpanded: false
+  - sections:
+    - local: api/generation/parameters
+      title: Parameters
+    - local: api/generation/configuration_utils
+      title: Configuration
+    - local: api/generation/logits_process
+      title: Logits Processors
+    - local: api/generation/logits_sampler
+      title: Logits Samplers
+    - local: api/generation/stopping_criteria
+      title: Stopping Criteria
+    - local: api/generation/streamers
+      title: Streamers
+    title: Generation
+    isExpanded: false
   - sections:
     - local: api/utils/core
       title: Core
@@ -61,8 +76,6 @@
       title: Tensor
     - local: api/utils/maths
       title: Maths
-    - local: api/utils/generation
-      title: Generation
     - local: api/utils/data-structures
       title: Data Structures
     title: Utilities
diff --git a/docs/source/guides/node-audio-processing.md b/docs/source/guides/node-audio-processing.md
index 88d93df2d..1b9e3cfea 100644
--- a/docs/source/guides/node-audio-processing.md
+++ b/docs/source/guides/node-audio-processing.md
@@ -13,7 +13,7 @@ This tutorial will be written as an ES module, but you can easily adapt it to us
 
 
 **Useful links:**
-- [Source code](https://github.com/xenova/transformers.js/tree/main/examples/node-audio-processing)
+- [Source code](https://github.com/huggingface/transformers.js/tree/main/examples/node-audio-processing)
 - [Documentation](https://huggingface.co/docs/transformers.js)
 
 
@@ -26,11 +26,11 @@ This tutorial will be written as an ES module, but you can easily adapt it to us
 
 ## Getting started
 
-Let's start by creating a new Node.js project and installing Transformers.js via [NPM](https://www.npmjs.com/package/@xenova/transformers):
+Let's start by creating a new Node.js project and installing Transformers.js via [NPM](https://www.npmjs.com/package/@huggingface/transformers):
 
 ```bash
 npm init -y
-npm i @xenova/transformers
+npm i @huggingface/transformers
 ```
 
 <Tip>
@@ -52,7 +52,7 @@ npm i wavefile
 Start by creating a new file called `index.js`, which will be the entry point for our application. Let's also import the necessary modules:
 
 ```js
-import { pipeline } from '@xenova/transformers';
+import { pipeline } from '@huggingface/transformers';
 import wavefile from 'wavefile';
 ```
 
diff --git a/docs/source/guides/private.md b/docs/source/guides/private.md
index a687e1789..6715f0d4e 100644
--- a/docs/source/guides/private.md
+++ b/docs/source/guides/private.md
@@ -28,7 +28,7 @@ Transformers.js will attach an Authorization header to requests made to the Hugg
 One way to do this is to call your program with the environment variable set. For example, let's say you have a file called `llama.js` with the following code:
 
 ```js
-import { AutoTokenizer } from '@xenova/transformers';
+import { AutoTokenizer } from '@huggingface/transformers';
 
 // Load tokenizer for a gated repository.
 const tokenizer = await AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-hf');
diff --git a/docs/source/index.md b/docs/source/index.md
index 1b94c115f..6551e303f 100644
--- a/docs/source/index.md
+++ b/docs/source/index.md
@@ -35,7 +35,7 @@ The documentation is organized into 4 sections:
 
 Here is the list of all tasks and architectures currently supported by Transformers.js.
 If you don't see your task/model listed here or it is not yet supported, feel free
-to open up a feature request [here](https://github.com/xenova/transformers.js/issues/new/choose).
+to open up a feature request [here](https://github.com/huggingface/transformers.js/issues/new/choose).
 
 To find compatible models on the Hub, select the "transformers.js" library tag in the filter menu (or visit [this link](https://huggingface.co/models?library=transformers.js)).
 You can refine your search by selecting the task you're interested in (e.g., [text-classification](https://huggingface.co/models?pipeline_tag=text-classification&library=transformers.js)).
diff --git a/docs/source/pipelines.md b/docs/source/pipelines.md
index 93f4ee216..0c1b3d584 100644
--- a/docs/source/pipelines.md
+++ b/docs/source/pipelines.md
@@ -14,7 +14,7 @@ For the full list of available tasks/pipelines, check out [this table](#availabl
 Start by creating an instance of `pipeline()` and specifying a task you want to use it for. For example, to create a sentiment analysis pipeline, you can do:
 
 ```javascript
-import { pipeline } from '@xenova/transformers';
+import { pipeline } from '@huggingface/transformers';
 
 let classifier = await pipeline('sentiment-analysis');
 ```
diff --git a/docs/source/tutorials/browser-extension.md b/docs/source/tutorials/browser-extension.md
index a5fd391bd..a8853c5d4 100644
--- a/docs/source/tutorials/browser-extension.md
+++ b/docs/source/tutorials/browser-extension.md
@@ -1,4 +1,4 @@
 # Building a browser extension
 
-*Full tutorial coming soon...* In the meantime, check out the example application: https://github.com/xenova/transformers.js/tree/main/examples/extension
+*Full tutorial coming soon...* In the meantime, check out the example application: https://github.com/huggingface/transformers.js/tree/main/examples/extension
 
diff --git a/docs/source/tutorials/electron.md b/docs/source/tutorials/electron.md
index 6962e4b7e..5fb3650c1 100644
--- a/docs/source/tutorials/electron.md
+++ b/docs/source/tutorials/electron.md
@@ -1,3 +1,3 @@
 # Building an Electron application
 
-*Full tutorial coming soon...* In the meantime, check out the example application: https://github.com/xenova/transformers.js/tree/main/examples/electron
+*Full tutorial coming soon...* In the meantime, check out the example application: https://github.com/huggingface/transformers.js/tree/main/examples/electron
diff --git a/docs/source/tutorials/next.md b/docs/source/tutorials/next.md
index b3bcff659..0c8c70279 100644
--- a/docs/source/tutorials/next.md
+++ b/docs/source/tutorials/next.md
@@ -9,7 +9,7 @@ The final product will look something like this:
 
 Useful links:
 - Demo site: [client-side](https://huggingface.co/spaces/Xenova/next-example-app) or [server-side](https://huggingface.co/spaces/Xenova/next-server-example-app)
-- Source code: [client-side](https://github.com/xenova/transformers.js/tree/main/examples/next-client) or [server-side](https://github.com/xenova/transformers.js/tree/main/examples/next-server)
+- Source code: [client-side](https://github.com/huggingface/transformers.js/tree/main/examples/next-client) or [server-side](https://github.com/huggingface/transformers.js/tree/main/examples/next-server)
 
 ## Prerequisites
 
@@ -42,11 +42,11 @@ On installation, you'll see various prompts. For this demo, we'll be selecting t
 
 ### Step 2: Install and configure Transformers.js
 
-You can install Transformers.js from [NPM](https://www.npmjs.com/package/@xenova/transformers) with the following command:
+You can install Transformers.js from [NPM](https://www.npmjs.com/package/@huggingface/transformers) with the following command:
 
 
 ```bash
-npm i @xenova/transformers
+npm i @huggingface/transformers
 ```
 
 We also need to update the `next.config.js` file to ignore node-specific modules when bundling for the browser:
@@ -76,7 +76,7 @@ module.exports = nextConfig
 Next, we'll create a new [Web Worker](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers) script where we'll place all ML-related code. This is to ensure that the main thread is not blocked while the model is loading and performing inference. For this application, we'll be using [`Xenova/distilbert-base-uncased-finetuned-sst-2-english`](https://huggingface.co/Xenova/distilbert-base-uncased-finetuned-sst-2-english), a ~67M parameter model finetuned on the [Stanford Sentiment Treebank](https://huggingface.co/datasets/sst) dataset. Add the following code to `./src/app/worker.js`:
 
 ```js
-import { pipeline, env } from "@xenova/transformers";
+import { pipeline, env } from "@huggingface/transformers";
 
 // Skip local model check
 env.allowLocalModels = false;
@@ -264,11 +264,11 @@ On installation, you'll see various prompts. For this demo, we'll be selecting t
 
 ### Step 2: Install and configure Transformers.js
 
-You can install Transformers.js from [NPM](https://www.npmjs.com/package/@xenova/transformers) with the following command:
+You can install Transformers.js from [NPM](https://www.npmjs.com/package/@huggingface/transformers) with the following command:
 
 
 ```bash
-npm i @xenova/transformers
+npm i @huggingface/transformers
 ```
 
 We also need to update the `next.config.js` file to prevent Webpack from bundling certain packages:
@@ -294,7 +294,7 @@ Next, let's set up our Route Handler. We can do this by creating two files in a
 1. `pipeline.js` - to handle the construction of our pipeline.
 
     ```js
-    import { pipeline } from "@xenova/transformers";
+    import { pipeline } from "@huggingface/transformers";
 
     // Use the Singleton pattern to enable lazy construction of the pipeline.
     // NOTE: We wrap the class in a function to prevent code duplication (see below).
@@ -413,7 +413,7 @@ Visit the URL shown in the terminal (e.g., [http://localhost:3000/](http://local
 
 For this demo, we will build and deploy our application to [Hugging Face Spaces](https://huggingface.co/docs/hub/spaces). If you haven't already, you can create a free Hugging Face account [here](https://huggingface.co/join).
 
-1. Create a new `Dockerfile` in your project's root folder. You can use our [example Dockerfile](https://github.com/xenova/transformers.js/blob/main/examples/next-server/Dockerfile) as a template.
+1. Create a new `Dockerfile` in your project's root folder. You can use our [example Dockerfile](https://github.com/huggingface/transformers.js/blob/main/examples/next-server/Dockerfile) as a template.
 2. Visit [https://huggingface.co/new-space](https://huggingface.co/new-space) and fill in the form. Remember to select "Docker" as the space type (you can choose the "Blank" Docker template).
 3. Click the "Create space" button at the bottom of the page.
 4. Go to "Files" &rarr; "Add file" &rarr; "Upload files". Drag the files from your project folder (excluding `node_modules` and `.next`, if present) into the upload box and click "Upload". After they have uploaded, scroll down to the button and click "Commit changes to main".
diff --git a/docs/source/tutorials/node.md b/docs/source/tutorials/node.md
index 2d9e3adf4..7cc5cc6be 100644
--- a/docs/source/tutorials/node.md
+++ b/docs/source/tutorials/node.md
@@ -19,7 +19,7 @@ Although you can always use the [Python library](https://github.com/huggingface/
 </Tip>
 
 **Useful links:**
-- Source code ([ESM](https://github.com/xenova/transformers.js/tree/main/examples/node/esm/app.js) or [CommonJS](https://github.com/xenova/transformers.js/tree/main/examples/node/commonjs/app.js))
+- Source code ([ESM](https://github.com/huggingface/transformers.js/tree/main/examples/node/esm/app.js) or [CommonJS](https://github.com/huggingface/transformers.js/tree/main/examples/node/commonjs/app.js))
 - [Documentation](https://huggingface.co/docs/transformers.js) 
 
 
@@ -31,11 +31,11 @@ Although you can always use the [Python library](https://github.com/huggingface/
 
 ## Getting started
 
-Let's start by creating a new Node.js project and installing Transformers.js via [NPM](https://www.npmjs.com/package/@xenova/transformers):
+Let's start by creating a new Node.js project and installing Transformers.js via [NPM](https://www.npmjs.com/package/@huggingface/transformers):
 
 ```bash
 npm init -y
-npm i @xenova/transformers
+npm i @huggingface/transformers
 ```
 
 Next, create a new file called `app.js`, which will be the entry point for our application. Depending on whether you're using [ECMAScript modules](#ecmascript-modules-esm) or [CommonJS](#commonjs), you will need to do some things differently (see below).
@@ -66,7 +66,7 @@ import url from 'url';
 Following that, let's import Transformers.js and define the `MyClassificationPipeline` class.
 
 ```javascript
-import { pipeline, env } from '@xenova/transformers';
+import { pipeline, env } from '@huggingface/transformers';
 
 class MyClassificationPipeline {
   static task = 'text-classification';
@@ -107,7 +107,7 @@ class MyClassificationPipeline {
   static async getInstance(progress_callback = null) {
     if (this.instance === null) {
       // Dynamically import the Transformers.js library
-      let { pipeline, env } = await import('@xenova/transformers');
+      let { pipeline, env } = await import('@huggingface/transformers');
 
       // NOTE: Uncomment this to change the cache directory
       // env.cacheDir = './.cache';
@@ -195,7 +195,7 @@ Great! We've successfully created a basic HTTP server that uses Transformers.js
 
 ### Model caching
 
-By default, the first time you run the application, it will download the model files and cache them on your file system (in `./node_modules/@xenova/transformers/.cache/`). All subsequent requests will then use this model. You can change the location of the cache by setting `env.cacheDir`. For example, to cache the model in the `.cache` directory in the current working directory, you can add:
+By default, the first time you run the application, it will download the model files and cache them on your file system (in `./node_modules/@huggingface/transformers/.cache/`). All subsequent requests will then use this model. You can change the location of the cache by setting `env.cacheDir`. For example, to cache the model in the `.cache` directory in the current working directory, you can add:
 
 ```javascript
 env.cacheDir = './.cache';
diff --git a/docs/source/tutorials/react.md b/docs/source/tutorials/react.md
index ab50d4de9..e617d8a05 100644
--- a/docs/source/tutorials/react.md
+++ b/docs/source/tutorials/react.md
@@ -7,7 +7,7 @@ In this tutorial, we'll be building a simple React application that performs mul
 
 Useful links:
 - [Demo site](https://huggingface.co/spaces/Xenova/react-translator)
-- [Source code](https://github.com/xenova/transformers.js/tree/main/examples/react-translator)
+- [Source code](https://github.com/huggingface/transformers.js/tree/main/examples/react-translator)
 
 
 ## Prerequisites
@@ -44,10 +44,10 @@ You can stop the development server by pressing <kbd>Ctrl</kbd> + <kbd>C</kbd> i
 
 ## Step 2: Install and configure Transformers.js
 
-Now we get to the fun part: adding machine learning to our application! First, install Transformers.js from [NPM](https://www.npmjs.com/package/@xenova/transformers) with the following command:
+Now we get to the fun part: adding machine learning to our application! First, install Transformers.js from [NPM](https://www.npmjs.com/package/@huggingface/transformers) with the following command:
 
 ```bash
-npm install @xenova/transformers
+npm install @huggingface/transformers
 ```
 
 For this application, we will use the [Xenova/nllb-200-distilled-600M](https://huggingface.co/Xenova/nllb-200-distilled-600M) model, which can perform multilingual translation among 200 languages. Before we start, there are 2 things we need to take note of:
@@ -58,7 +58,7 @@ We can achieve both of these goals by using a [Web Worker](https://developer.moz
 
 1. Create a file called `worker.js` in the `src` directory. This script will do all the heavy-lifing for us, including loading and running of the translation pipeline. To ensure the model is only loaded once, we will create the `MyTranslationPipeline` class which use the [singleton pattern](https://en.wikipedia.org/wiki/Singleton_pattern) to lazily create a single instance of the pipeline when `getInstance` is first called, and use this pipeline for all subsequent calls:
     ```javascript
-    import { pipeline } from '@xenova/transformers';
+    import { pipeline } from '@huggingface/transformers';
 
     class MyTranslationPipeline {
       static task = 'translation';
@@ -127,7 +127,7 @@ We recommend starting the development server again with `npm run dev`
 
 
 First, let's define our components. Create a folder called `components` in the `src` directory, and create the following files:
-1. `LanguageSelector.jsx`: This component will allow the user to select the input and output languages. Check out the full list of languages [here](https://github.com/xenova/transformers.js/blob/main/examples/react-translator/src/components/LanguageSelector.jsx).
+1. `LanguageSelector.jsx`: This component will allow the user to select the input and output languages. Check out the full list of languages [here](https://github.com/huggingface/transformers.js/blob/main/examples/react-translator/src/components/LanguageSelector.jsx).
     ```jsx
     const LANGUAGES = {
       "Acehnese (Arabic script)": "ace_Arab",
diff --git a/docs/source/tutorials/vanilla-js.md b/docs/source/tutorials/vanilla-js.md
index 7bc503006..58e336f12 100644
--- a/docs/source/tutorials/vanilla-js.md
+++ b/docs/source/tutorials/vanilla-js.md
@@ -10,7 +10,7 @@ Useful links:
 
 - [Demo site](https://huggingface.co/spaces/Scrimba/vanilla-js-object-detector)
 - [Interactive code walk-through (scrim)](https://scrimba.com/scrim/cKm9bDAg)
-- [Source code](https://github.com/xenova/transformers.js/tree/main/examples/vanilla-js)
+- [Source code](https://github.com/huggingface/transformers.js/tree/main/examples/vanilla-js)
 
 ## Step 1:  HTML and CSS setup
 
@@ -104,7 +104,7 @@ The `type="module"` attribute is important, as it turns our file into a [JavaScr
 Moving into `index.js`, let's import Transformers.js by adding the following line to the top of the file:
 
 ```js
-import { pipeline, env } from "https://cdn.jsdelivr.net/npm/@xenova/transformers@2.6.0";
+import { pipeline, env } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers";
 ```
 
 Since we will be downloading the model from the Hugging Face Hub, we can skip the local model check by setting:
diff --git a/examples/code-completion/src/App.jsx b/examples/code-completion/src/App.jsx
index a532f7299..7fc84f538 100644
--- a/examples/code-completion/src/App.jsx
+++ b/examples/code-completion/src/App.jsx
@@ -162,7 +162,7 @@ function App() {
       <div className="flex-grow sidebar p-4 flex flex-col overflow-y-auto">
         <h2 className="text-2xl font-semibold text-center mb-1">In-browser code completion</h2>
         <div className="text-center">
-          Made with&nbsp;<a className="text-white ital underline" href="https://github.com/xenova/transformers.js">🤗 Transformers.js</a>
+          Made with&nbsp;<a className="text-white ital underline" href="https://github.com/huggingface/transformers.js">🤗 Transformers.js</a>
         </div>
 
         <label className="mt-3">Model:</label>
@@ -241,7 +241,7 @@ function App() {
             <path fillRule="evenodd" d="M10 .333A9.911 9.911 0 0 0 6.866 19.65c.5.092.678-.215.678-.477 0-.237-.01-1.017-.014-1.845-2.757.6-3.338-1.169-3.338-1.169a2.627 2.627 0 0 0-1.1-1.451c-.9-.615.07-.6.07-.6a2.084 2.084 0 0 1 1.518 1.021 2.11 2.11 0 0 0 2.884.823c.044-.503.268-.973.63-1.325-2.2-.25-4.516-1.1-4.516-4.9A3.832 3.832 0 0 1 4.7 7.068a3.56 3.56 0 0 1 .095-2.623s.832-.266 2.726 1.016a9.409 9.409 0 0 1 4.962 0c1.89-1.282 2.717-1.016 2.717-1.016.366.83.402 1.768.1 2.623a3.827 3.827 0 0 1 1.02 2.659c0 3.807-2.319 4.644-4.525 4.889a2.366 2.366 0 0 1 .673 1.834c0 1.326-.012 2.394-.012 2.72 0 .263.18.572.681.475A9.911 9.911 0 0 0 10 .333Z" clipRule="evenodd" />
           </svg>
 
-          <a className="text-white font-normal underline underline-offset-1" href="https://github.com/xenova/transformers.js/tree/main/examples/code-completion">Source code</a>
+          <a className="text-white font-normal underline underline-offset-1" href="https://github.com/huggingface/transformers.js/tree/main/examples/code-completion">Source code</a>
         </div>
       </div>
     </div>
diff --git a/examples/demo-site/src/index.html b/examples/demo-site/src/index.html
index 9613acf63..49344c159 100644
--- a/examples/demo-site/src/index.html
+++ b/examples/demo-site/src/index.html
@@ -65,7 +65,7 @@ <h1 class="display-5 fw-bolder text-white mb-2">Transformers.js</h1>
               <div class="d-grid gap-3 d-sm-flex justify-content-sm-center">
                 <a class="btn btn-primary btn-lg px-4 me-sm-3"
                   href="https://huggingface.co/docs/transformers.js">Documentation</a>
-                <a class="btn btn-outline-light btn-lg px-4" href="https://github.com/xenova/transformers.js">
+                <a class="btn btn-outline-light btn-lg px-4" href="https://github.com/huggingface/transformers.js">
                   <i class="bi bi-github"></i> View Source
                 </a>
               </div>
diff --git a/examples/electron/README.md b/examples/electron/README.md
index 3d40e0c20..898801a12 100644
--- a/examples/electron/README.md
+++ b/examples/electron/README.md
@@ -6,7 +6,7 @@ An example project to show how to run 🤗 Transformers in an [Electron](https:/
 ## Getting Started
 1. Clone the repo and enter the project directory:
     ```bash
-    git clone https://github.com/xenova/transformers.js.git
+    git clone https://github.com/huggingface/transformers.js.git
     cd transformers.js/examples/electron/
     ```
 1. Install the necessary dependencies:
diff --git a/examples/extension/README.md b/examples/extension/README.md
index dfc81946f..4c4e0bceb 100644
--- a/examples/extension/README.md
+++ b/examples/extension/README.md
@@ -6,7 +6,7 @@ An example project to show how to run 🤗 Transformers in a browser extension.
 ## Getting Started
 1. Clone the repo and enter the project directory:
     ```bash
-    git clone https://github.com/xenova/transformers.js.git
+    git clone https://github.com/huggingface/transformers.js.git
     cd transformers.js/examples/extension/
     ```
 1. Install the necessary dependencies:
diff --git a/examples/florence2-webgpu/.eslintrc.cjs b/examples/florence2-webgpu/.eslintrc.cjs
new file mode 100644
index 000000000..3e212e1d4
--- /dev/null
+++ b/examples/florence2-webgpu/.eslintrc.cjs
@@ -0,0 +1,21 @@
+module.exports = {
+  root: true,
+  env: { browser: true, es2020: true },
+  extends: [
+    'eslint:recommended',
+    'plugin:react/recommended',
+    'plugin:react/jsx-runtime',
+    'plugin:react-hooks/recommended',
+  ],
+  ignorePatterns: ['dist', '.eslintrc.cjs'],
+  parserOptions: { ecmaVersion: 'latest', sourceType: 'module' },
+  settings: { react: { version: '18.2' } },
+  plugins: ['react-refresh'],
+  rules: {
+    'react/jsx-no-target-blank': 'off',
+    'react-refresh/only-export-components': [
+      'warn',
+      { allowConstantExport: true },
+    ],
+  },
+}
diff --git a/examples/florence2-webgpu/.gitignore b/examples/florence2-webgpu/.gitignore
new file mode 100644
index 000000000..a547bf36d
--- /dev/null
+++ b/examples/florence2-webgpu/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/examples/florence2-webgpu/README.md b/examples/florence2-webgpu/README.md
new file mode 100644
index 000000000..f768e33fc
--- /dev/null
+++ b/examples/florence2-webgpu/README.md
@@ -0,0 +1,8 @@
+# React + Vite
+
+This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
+
+Currently, two official plugins are available:
+
+- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh
+- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
diff --git a/examples/florence2-webgpu/index.html b/examples/florence2-webgpu/index.html
new file mode 100644
index 000000000..77f8f0a0c
--- /dev/null
+++ b/examples/florence2-webgpu/index.html
@@ -0,0 +1,12 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Florence2 WebGPU</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>
diff --git a/examples/florence2-webgpu/package.json b/examples/florence2-webgpu/package.json
new file mode 100644
index 000000000..490ad589f
--- /dev/null
+++ b/examples/florence2-webgpu/package.json
@@ -0,0 +1,30 @@
+{
+  "name": "florence2-webgpu",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "@xenova/transformers": "github:xenova/transformers.js#v3",
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1"
+  },
+  "devDependencies": {
+    "@types/react": "^18.3.3",
+    "@types/react-dom": "^18.3.0",
+    "@vitejs/plugin-react": "^4.3.1",
+    "autoprefixer": "^10.4.19",
+    "eslint": "^8.57.0",
+    "eslint-plugin-react": "^7.34.2",
+    "eslint-plugin-react-hooks": "^4.6.2",
+    "eslint-plugin-react-refresh": "^0.4.7",
+    "postcss": "^8.4.38",
+    "tailwindcss": "^3.4.4",
+    "vite": "^5.3.1"
+  }
+}
diff --git a/examples/florence2-webgpu/postcss.config.js b/examples/florence2-webgpu/postcss.config.js
new file mode 100644
index 000000000..2e7af2b7f
--- /dev/null
+++ b/examples/florence2-webgpu/postcss.config.js
@@ -0,0 +1,6 @@
+export default {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+}
diff --git a/examples/florence2-webgpu/src/App.jsx b/examples/florence2-webgpu/src/App.jsx
new file mode 100644
index 000000000..36ac67e0f
--- /dev/null
+++ b/examples/florence2-webgpu/src/App.jsx
@@ -0,0 +1,218 @@
+import { useEffect, useState, useRef, useCallback } from 'react';
+
+import Progress from './components/Progress';
+import ImageInput from './components/ImageInput';
+
+const IS_WEBGPU_AVAILABLE = !!navigator.gpu;
+
+function App() {
+
+  // Create a reference to the worker object.
+  const worker = useRef(null);
+
+  // Model loading and progress
+  const [status, setStatus] = useState(null);
+  const [loadingMessage, setLoadingMessage] = useState('');
+  const [progressItems, setProgressItems] = useState([]);
+
+  const [task, setTask] = useState('<CAPTION>');
+  const [text, setText] = useState('');
+  const [image, setImage] = useState(null);
+  const [result, setResult] = useState(null);
+  const [time, setTime] = useState(null);
+
+  // We use the `useEffect` hook to setup the worker as soon as the `App` component is mounted.
+  useEffect(() => {
+    if (!worker.current) {
+      // Create the worker if it does not yet exist.
+      worker.current = new Worker(new URL('./worker.js', import.meta.url), {
+        type: 'module'
+      });
+    }
+
+    // Create a callback function for messages from the worker thread.
+    const onMessageReceived = (e) => {
+      switch (e.data.status) {
+        case 'loading':
+          // Model file start load: add a new progress item to the list.
+          setStatus('loading');
+          setLoadingMessage(e.data.data);
+          break;
+
+        case 'initiate':
+          setProgressItems(prev => [...prev, e.data]);
+          break;
+
+        case 'progress':
+          // Model file progress: update one of the progress items.
+          setProgressItems(
+            prev => prev.map(item => {
+              if (item.file === e.data.file) {
+                return { ...item, ...e.data }
+              }
+              return item;
+            })
+          );
+          break;
+
+        case 'done':
+          // Model file loaded: remove the progress item from the list.
+          setProgressItems(
+            prev => prev.filter(item => item.file !== e.data.file)
+          );
+          break;
+
+        case 'ready':
+          // Pipeline ready: the worker is ready to accept messages.
+          setStatus('ready');
+          break;
+
+        case 'complete':
+          setResult(e.data.result);
+          setTime(e.data.time);
+          setStatus('ready');
+          break;
+      }
+    };
+
+    // Attach the callback function as an event listener.
+    worker.current.addEventListener('message', onMessageReceived);
+
+    // Define a cleanup function for when the component is unmounted.
+    return () => {
+      worker.current.removeEventListener('message', onMessageReceived);
+    };
+  }, []);
+
+  const handleClick = useCallback(() => {
+    if (status === null) {
+      setStatus('loading');
+      worker.current.postMessage({ type: 'load' });
+    } else {
+      setStatus('running');
+      worker.current.postMessage({
+        type: 'run', data: { text, url: image, task }
+      });
+    }
+  }, [status, task, image, text]);
+
+  return (
+    IS_WEBGPU_AVAILABLE
+      ? (<div className="flex flex-col h-screen mx-auto items justify-end text-gray-800 dark:text-gray-200 bg-white dark:bg-gray-900 max-w-[630px]">
+
+        {status === 'loading' && (
+          <div className="flex justify-center items-center fixed w-screen h-screen bg-black z-10 bg-opacity-[92%] top-0 left-0">
+            <div className="w-[500px]">
+              <p className="text-center mb-1 text-white text-md">{loadingMessage}</p>
+              {progressItems.map(({ file, progress, total }, i) => (
+                <Progress key={i} text={file} percentage={progress} total={total} />
+              ))}
+            </div>
+          </div>
+        )}
+        <div className="h-full overflow-auto scrollbar-thin flex justify-center items-center flex-col relative">
+          <div className="flex flex-col items-center mb-1 text-center">
+            <h1 className="text-6xl font-bold mb-2">Florence2 WebGPU</h1>
+            <h2 className="text-xl font-semibold">Powerful vision foundation model running locally in your browser.</h2>
+          </div>
+
+          <div className="w-full min-h-[220px] flex flex-col justify-center items-center p-2">
+
+            <p className="mb-2">
+              You are about to download <a href="https://huggingface.co/onnx-community/Florence-2-base-ft" target="_blank" rel="noreferrer" className="font-medium underline">Florence-2-base-ft</a>,
+              a 230 million parameter vision foundation model that uses a prompt-based approach to handle a wide range of vision and vision-language tasks like captioning, object detection, and segmentation.
+              Once loaded, the model (340&nbsp;MB) will be cached and reused when you revisit the page.<br />
+              <br />
+              Everything runs locally in your browser using <a href="https://huggingface.co/docs/transformers.js" target="_blank" rel="noreferrer" className="underline">🤗&nbsp;Transformers.js</a> and ONNX Runtime Web,
+              meaning no API calls are made to a server for inference. You can even disconnect from the internet after the model has loaded!
+            </p>
+
+            <div className="flex w-full justify-around m-4">
+              <div className="flex flex-col gap-2 w-full max-w-[48%]">
+                <div className="flex flex-col">
+                  <span className="text-sm mb-0.5">Task</span>
+                  <select
+                    className="border rounded-md p-1"
+                    value={task}
+                    onChange={(e) => setTask(e.target.value)}
+                  >
+                    <option value="<CAPTION>">Caption</option>
+                    <option value="<DETAILED_CAPTION>">Detailed Caption</option>
+                    <option value="<MORE_DETAILED_CAPTION>">More Detailed Caption</option>
+                    <option value="<OCR>">OCR</option>
+                    <option value="<OCR_WITH_REGION>">OCR with Region</option>
+                    <option value="<OD>">Object Detection</option>
+                    <option value="<DENSE_REGION_CAPTION>">Dense Region Caption</option>
+                    <option value="<CAPTION_TO_PHRASE_GROUNDING>">Caption to Phrase Grounding</option>
+                    {/* <option value="<REFERRING_EXPRESSION_SEGMENTATION>">Referring Expression Segmentation</option> */}
+                    {/* <option value="<REGION_TO_SEGMENTATION>">Region to Segmentation</option> */}
+                    {/* <option value="<OPEN_VOCABULARY_DETECTION>">Open Vocabulary Detection</option> */}
+                    {/* <option value="<REGION_TO_CATEGORY>">Region to Category</option> */}
+                    {/* <option value="<REGION_TO_DESCRIPTION>">Region to Description</option> */}
+                    {/* <option value="<REGION_TO_OCR>">Region to OCR</option> */}
+                    {/* <option value="<REGION_PROPOSAL>">Region Proposal</option> */}
+                  </select>
+                </div>
+                <div className="flex flex-col">
+                  <span className="text-sm mb-0.5">Input Image</span>
+                  <ImageInput className="flex flex-col items-center border border-gray-300 rounded-md cursor-pointer h-[250px]" onImageChange={(file, result) => {
+                    worker.current.postMessage({ type: 'reset' }); // Reset image cache
+                    setResult(null);
+                    setImage(result);
+                  }} />
+                </div>
+              </div>
+              <div className="flex flex-col gap-2 w-full max-w-[48%] justify-end">
+                {
+                  task === '<CAPTION_TO_PHRASE_GROUNDING>'
+                  && (<div className="flex flex-col">
+                    <span className="text-sm mb-0.5">Text input</span>
+                    <input className="border rounded-md px-2 py-[3.5px]"
+                      value={text}
+                      onChange={(e) => setText(e.target.value)}
+                    />
+                  </div>)
+                }
+
+                <div className="flex flex-col relative">
+                  <span className="text-sm mb-0.5">Output</span>
+                  <div className="flex justify-center border border-gray-300 rounded-md h-[250px]">
+                    {result?.[task] && (<>
+                      {
+                        typeof result[task] === 'string'
+                          ? <p className="pt-4 px-4 text-center max-h-[205px] overflow-y-auto">{result[task]}</p>
+                          : <pre className="w-full h-full p-2 overflow-y-auto">
+                            {JSON.stringify(result[task], null, 2)}
+                          </pre>
+                      }
+                      {
+                        time && <p className="text-sm text-gray-500 absolute bottom-2 bg-white p-1 rounded border">Execution time: {time.toFixed(2)} ms</p>
+                      }
+                    </>)
+                    }
+                  </div>
+
+                </div>
+              </div>
+            </div>
+
+            <button
+              className="border px-4 py-2 rounded-lg bg-blue-400 text-white hover:bg-blue-500 disabled:bg-blue-100 disabled:cursor-not-allowed select-none"
+              onClick={handleClick}
+              disabled={status === 'running' || (status !== null && image === null)}
+            >
+              {status === null ? 'Load model' :
+                status === 'running'
+                  ? 'Running...'
+                  : 'Run model'
+              }
+            </button>
+          </div>
+        </div>
+
+      </div >)
+      : (<div className="fixed w-screen h-screen bg-black z-10 bg-opacity-[92%] text-white text-2xl font-semibold flex justify-center items-center text-center">WebGPU is not supported<br />by this browser :&#40;</div>)
+  )
+}
+
+export default App
diff --git a/examples/florence2-webgpu/src/components/ImageInput.jsx b/examples/florence2-webgpu/src/components/ImageInput.jsx
new file mode 100644
index 000000000..9f24d9d5b
--- /dev/null
+++ b/examples/florence2-webgpu/src/components/ImageInput.jsx
@@ -0,0 +1,68 @@
+import { useState, useRef } from 'react';
+
+const EXAMPLE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/beetle.png';
+
+const ImageInput = ({ onImageChange, ...props }) => {
+    const [imagePreview, setImagePreview] = useState(null);
+    const fileInputRef = useRef(null);
+
+    const readFile = (file) => {
+        if (!file) return;
+        const reader = new FileReader();
+        reader.onloadend = () => {
+            setImagePreview(reader.result);
+            if (onImageChange) {
+                onImageChange(file, reader.result);
+            }
+        };
+        reader.readAsDataURL(file);
+    }
+
+    const handleImageChange = (event) => {
+        readFile(event.target.files[0]);
+    };
+
+    const handleDragOver = (event) => {
+        event.preventDefault();
+    };
+
+    const handleDrop = (event) => {
+        event.preventDefault();
+        readFile(event.dataTransfer.files[0]);
+    };
+
+    const handleClick = () => {
+        fileInputRef.current.click();
+    };
+
+    return (
+        <div
+            {...props}
+            onClick={handleClick}
+            onDragOver={handleDragOver}
+            onDrop={handleDrop}
+        >
+            <input
+                type="file"
+                accept="image/*"
+                onChange={handleImageChange}
+                ref={fileInputRef}
+                className="hidden"
+            />
+            {imagePreview ? (
+                <img src={imagePreview} alt="Selected" className="w-full max-h-[250px] h-full object-contain rounded-md" />
+            ) : (
+                <div className="w-full h-full flex flex-col items-center justify-center border-2 border-dashed border-gray-300 rounded-md">
+                    <span className="text-gray-600 text-center m-3"><u>Drag & drop</u> or <u>click</u><br />to select an image</span>
+                    <span className="text-gray-500 text-sm hover:text-gray-800" onClick={(e) => {
+                        e.stopPropagation();
+                        setImagePreview(EXAMPLE_URL);
+                        onImageChange(null, EXAMPLE_URL);
+                    }}>(or <u>try an example</u>)</span>
+                </div>
+            )}
+        </div>
+    );
+};
+
+export default ImageInput;
diff --git a/examples/florence2-webgpu/src/components/Progress.jsx b/examples/florence2-webgpu/src/components/Progress.jsx
new file mode 100644
index 000000000..9ce024cc8
--- /dev/null
+++ b/examples/florence2-webgpu/src/components/Progress.jsx
@@ -0,0 +1,15 @@
+function formatBytes(size) {
+    const i = size == 0 ? 0 : Math.floor(Math.log(size) / Math.log(1024));
+    return +((size / Math.pow(1024, i)).toFixed(2)) * 1 + ['B', 'kB', 'MB', 'GB', 'TB'][i];
+}
+
+export default function Progress({ text, percentage, total }) {
+    percentage ??= 0;
+    return (
+        <div className="w-full bg-gray-100 dark:bg-gray-700 text-left rounded-lg overflow-hidden mb-0.5">
+            <div className="bg-blue-400 whitespace-nowrap px-1 text-sm" style={{ width: `${percentage}%` }}>
+                {text} ({percentage.toFixed(2)}%{isNaN(total) ? '' : ` of ${formatBytes(total)}`})
+            </div>
+        </div>
+    );
+}
diff --git a/examples/florence2-webgpu/src/index.css b/examples/florence2-webgpu/src/index.css
new file mode 100644
index 000000000..c4a1285e0
--- /dev/null
+++ b/examples/florence2-webgpu/src/index.css
@@ -0,0 +1,21 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+@layer utilities {
+  .scrollbar-thin::-webkit-scrollbar {
+    @apply w-2;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-track {
+    @apply rounded-full bg-gray-100 dark:bg-gray-700;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-thumb {
+    @apply rounded-full bg-gray-300 dark:bg-gray-600;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-thumb:hover {
+    @apply bg-gray-500;
+  }
+}
diff --git a/examples/florence2-webgpu/src/main.jsx b/examples/florence2-webgpu/src/main.jsx
new file mode 100644
index 000000000..54b39dd1d
--- /dev/null
+++ b/examples/florence2-webgpu/src/main.jsx
@@ -0,0 +1,10 @@
+import React from 'react'
+import ReactDOM from 'react-dom/client'
+import App from './App.jsx'
+import './index.css'
+
+ReactDOM.createRoot(document.getElementById('root')).render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>,
+)
diff --git a/examples/florence2-webgpu/src/worker.js b/examples/florence2-webgpu/src/worker.js
new file mode 100644
index 000000000..92c1732f4
--- /dev/null
+++ b/examples/florence2-webgpu/src/worker.js
@@ -0,0 +1,140 @@
+
+import {
+    Florence2ForConditionalGeneration,
+    AutoProcessor,
+    AutoTokenizer,
+    RawImage,
+    full,
+} from '@xenova/transformers';
+
+async function hasFp16() {
+    try {
+        const adapter = await navigator.gpu.requestAdapter();
+        return adapter.features.has('shader-f16');
+    } catch (e) {
+        return false;
+    }
+}
+
+/**
+ * This class uses the Singleton pattern to ensure that only one instance of the model is loaded.
+ */
+class Florence2Singleton {
+    static model_id = 'onnx-community/Florence-2-base-ft';
+
+    static async getInstance(progress_callback = null) {
+        this.processor ??= AutoProcessor.from_pretrained(this.model_id);
+        this.tokenizer ??= AutoTokenizer.from_pretrained(this.model_id);
+
+        this.supports_fp16 ??= await hasFp16();
+        this.model ??= Florence2ForConditionalGeneration.from_pretrained(this.model_id, {
+            dtype: {
+                embed_tokens: this.supports_fp16 ? 'fp16' : 'fp32',
+                vision_encoder: this.supports_fp16 ? 'fp16' : 'fp32',
+                encoder_model: 'q4', // or 'fp16' or 'fp32'
+                decoder_model_merged: 'q4', // or 'fp16' or 'fp32'
+            },
+            device: 'webgpu',
+            progress_callback,
+        });
+
+        return Promise.all([this.model, this.tokenizer, this.processor]);
+    }
+}
+
+
+async function load() {
+    self.postMessage({
+        status: 'loading',
+        data: 'Loading model...'
+    });
+
+    // Load the pipeline and save it for future use.
+    const [model, tokenizer, processor] = await Florence2Singleton.getInstance(x => {
+        // We also add a progress callback to the pipeline so that we can
+        // track model loading.
+        self.postMessage(x);
+    });
+
+    self.postMessage({
+        status: 'loading',
+        data: 'Compiling shaders and warming up model...'
+    });
+
+    // Dummy text and vision inputs
+    const text_inputs = tokenizer('a');
+    const pixel_values = full([1, 3, 768, 768], 0.0);
+
+    // Run model with dummy input to compile shaders
+    await model.generate({
+        ...text_inputs,
+        pixel_values,
+        max_new_tokens: 1,
+    });
+
+    self.postMessage({ status: 'ready' });
+}
+
+const TASKS_WITH_INPUTS = [
+    '<CAPTION_TO_PHRASE_GROUNDING>',
+]
+
+let vision_inputs;
+let image_size;
+async function run({ text, url, task }) {
+    const [model, tokenizer, processor] = await Florence2Singleton.getInstance();
+
+    // Read and preprocess image
+    const start = performance.now();
+    if (!vision_inputs) {
+        // Cache vision inputs when possible
+        const image = await RawImage.fromURL(url);
+        image_size = image.size;
+        vision_inputs = await processor(image);
+    }
+
+    let user_input = task;
+    if (TASKS_WITH_INPUTS.includes(task) && text) {
+        user_input += text;
+    }
+    const prompts = processor.construct_prompts(user_input);
+    const text_inputs = tokenizer(prompts);
+
+    // Generate text
+    const generated_ids = await model.generate({
+        ...text_inputs,
+        ...vision_inputs,
+        max_new_tokens: 128,
+        num_beams: 1,
+        do_sample: false,
+    });
+
+    // Decode generated text
+    const generated_text = tokenizer.batch_decode(generated_ids, { skip_special_tokens: false })[0];
+
+    // Post-process the generated text
+    const result = processor.post_process_generation(generated_text, task, image_size);
+
+    const end = performance.now();
+
+    self.postMessage({ status: 'complete', result, time: end - start });
+}
+
+// Listen for messages from the main thread
+self.addEventListener('message', async (e) => {
+    const { type, data } = e.data;
+
+    switch (type) {
+        case 'load':
+            load();
+            break;
+
+        case 'run':
+            run(data);
+            break;
+
+        case 'reset':
+            vision_inputs = image_size = null;
+            break;
+    }
+});
diff --git a/examples/florence2-webgpu/tailwind.config.js b/examples/florence2-webgpu/tailwind.config.js
new file mode 100644
index 000000000..d37737fc0
--- /dev/null
+++ b/examples/florence2-webgpu/tailwind.config.js
@@ -0,0 +1,12 @@
+/** @type {import('tailwindcss').Config} */
+export default {
+  content: [
+    "./index.html",
+    "./src/**/*.{js,ts,jsx,tsx}",
+  ],
+  theme: {
+    extend: {},
+  },
+  plugins: [],
+}
+
diff --git a/examples/florence2-webgpu/vite.config.js b/examples/florence2-webgpu/vite.config.js
new file mode 100644
index 000000000..5a33944a9
--- /dev/null
+++ b/examples/florence2-webgpu/vite.config.js
@@ -0,0 +1,7 @@
+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+
+// https://vitejs.dev/config/
+export default defineConfig({
+  plugins: [react()],
+})
diff --git a/examples/musicgen-web/.eslintrc.cjs b/examples/musicgen-web/.eslintrc.cjs
new file mode 100644
index 000000000..3e212e1d4
--- /dev/null
+++ b/examples/musicgen-web/.eslintrc.cjs
@@ -0,0 +1,21 @@
+module.exports = {
+  root: true,
+  env: { browser: true, es2020: true },
+  extends: [
+    'eslint:recommended',
+    'plugin:react/recommended',
+    'plugin:react/jsx-runtime',
+    'plugin:react-hooks/recommended',
+  ],
+  ignorePatterns: ['dist', '.eslintrc.cjs'],
+  parserOptions: { ecmaVersion: 'latest', sourceType: 'module' },
+  settings: { react: { version: '18.2' } },
+  plugins: ['react-refresh'],
+  rules: {
+    'react/jsx-no-target-blank': 'off',
+    'react-refresh/only-export-components': [
+      'warn',
+      { allowConstantExport: true },
+    ],
+  },
+}
diff --git a/examples/musicgen-web/.gitignore b/examples/musicgen-web/.gitignore
new file mode 100644
index 000000000..a547bf36d
--- /dev/null
+++ b/examples/musicgen-web/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/examples/musicgen-web/README.md b/examples/musicgen-web/README.md
new file mode 100644
index 000000000..f768e33fc
--- /dev/null
+++ b/examples/musicgen-web/README.md
@@ -0,0 +1,8 @@
+# React + Vite
+
+This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
+
+Currently, two official plugins are available:
+
+- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh
+- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
diff --git a/examples/musicgen-web/index.html b/examples/musicgen-web/index.html
new file mode 100644
index 000000000..cad1bcd1a
--- /dev/null
+++ b/examples/musicgen-web/index.html
@@ -0,0 +1,12 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>MusicGen Web | In-browser text-to-music w/ 🤗 Transformers.js!</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>
diff --git a/examples/musicgen-web/package.json b/examples/musicgen-web/package.json
new file mode 100644
index 000000000..0175494d7
--- /dev/null
+++ b/examples/musicgen-web/package.json
@@ -0,0 +1,30 @@
+{
+  "name": "musicgen-web",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "@xenova/transformers": "github:xenova/transformers.js#v3",
+    "react": "^18.2.0",
+    "react-dom": "^18.2.0"
+  },
+  "devDependencies": {
+    "@types/react": "^18.2.66",
+    "@types/react-dom": "^18.2.22",
+    "@vitejs/plugin-react": "^4.2.1",
+    "autoprefixer": "^10.4.19",
+    "eslint": "^8.57.0",
+    "eslint-plugin-react": "^7.34.1",
+    "eslint-plugin-react-hooks": "^4.6.0",
+    "eslint-plugin-react-refresh": "^0.4.6",
+    "postcss": "^8.4.38",
+    "tailwindcss": "^3.4.3",
+    "vite": "^5.2.0"
+  }
+}
diff --git a/examples/musicgen-web/postcss.config.js b/examples/musicgen-web/postcss.config.js
new file mode 100644
index 000000000..2e7af2b7f
--- /dev/null
+++ b/examples/musicgen-web/postcss.config.js
@@ -0,0 +1,6 @@
+export default {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+}
diff --git a/examples/musicgen-web/src/App.css b/examples/musicgen-web/src/App.css
new file mode 100644
index 000000000..91ab868f6
--- /dev/null
+++ b/examples/musicgen-web/src/App.css
@@ -0,0 +1,9 @@
+#root {
+  max-width: 960px;
+  height: 100vh;
+  margin: 0 auto;
+  text-align: center;
+  display: flex;
+  justify-content: center;
+  align-items: center;
+}
diff --git a/examples/musicgen-web/src/App.jsx b/examples/musicgen-web/src/App.jsx
new file mode 100644
index 000000000..a64e8b655
--- /dev/null
+++ b/examples/musicgen-web/src/App.jsx
@@ -0,0 +1,229 @@
+import { useEffect, useState, useRef } from 'react';
+import { AutoTokenizer, MusicgenForConditionalGeneration, BaseStreamer } from '@xenova/transformers';
+import { encodeWAV, share } from './utils.js';
+
+import './App.css';
+
+const MODEL_ID = 'Xenova/musicgen-small';
+
+// Adapted from https://huggingface.co/spaces/facebook/MusicGen
+const EXAMPLES = [
+  '80s pop track with bassy drums and synth',
+  '90s rock song with loud guitars and heavy drums',
+  'a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions bpm: 130',
+  'A cheerful country song with acoustic guitars',
+  'lofi slow bpm electro chill with organic samples',
+];
+
+// Enable sharing if running on Hugging Face Spaces
+const SHARING_ENABLED = window.location.host.endsWith('.hf.space');
+
+// Streamer to update progress
+class CallbackStreamer extends BaseStreamer {
+  constructor(callback_fn) {
+    super();
+    this.callback_fn = callback_fn;
+  }
+
+  put(value) {
+    return this.callback_fn(value);
+  }
+
+  end() {
+    return this.callback_fn();
+  }
+}
+
+// Main App component
+const App = () => {
+  // Input/output state
+  const [textInput, setTextInput] = useState(EXAMPLES[0]);
+  const [progress, setProgress] = useState(0);
+  const [loadProgress, setLoadProgress] = useState({});
+  const [statusText, setStatusText] = useState('Loading model (656MB)...');
+  const [result, setResult] = useState(null);
+  const audioRef = useRef(null);
+
+  // Model and tokenizer references
+  const modelPromise = useRef(null);
+  const tokenizerPromise = useRef(null);
+
+  // Generation parameters
+  const [guidance_scale, setGuidanceScale] = useState(3);
+  const [temperature, setTemperature] = useState(1);
+  const [duration, setDuration] = useState(10);
+
+  // Load model and tokenizer on first render
+  useEffect(() => {
+    modelPromise.current ??= MusicgenForConditionalGeneration.from_pretrained(MODEL_ID, {
+      progress_callback: (data) => {
+        if (data.status !== 'progress') return;
+        setLoadProgress(prev => ({ ...prev, [data.file]: data }))
+      },
+      dtype: {
+        text_encoder: 'q8',
+        decoder_model_merged: 'q8',
+        encodec_decode: 'fp32',
+      },
+      device: 'wasm',
+    });
+
+    tokenizerPromise.current ??= AutoTokenizer.from_pretrained(MODEL_ID);
+  }, []);
+
+  // Update progress bar based on load progress
+  useEffect(() => {
+    const items = Object.values(loadProgress);
+    if (items.length !== 5) return; // 5 files to load
+    let loaded = 0;
+    let total = 0;
+    for (const data of Object.values(loadProgress)) {
+      loaded += data.loaded;
+      total += data.total;
+    }
+    const progress = loaded / total;
+    setProgress(progress);
+    setStatusText(progress === 1
+      ? 'Ready!'
+      : `Loading model (${(progress * 100).toFixed()}% of 656MB)...`
+    );
+  }, [loadProgress]);
+
+  // Function to handle generating music
+  const generateMusic = async () => {
+    // Reset audio player and result
+    audioRef.current.src = '';
+    setResult(null);
+
+    // Get model and tokenizer
+    const tokenizer = await tokenizerPromise.current;
+    const model = await modelPromise.current;
+
+    // Get number of tokens to match user-specified duration (more intuitive for user)
+    // 503 tokens -> 10 seconds generated => ~50 tokens per second
+    // https://huggingface.co/docs/transformers/model_doc/musicgen#generation
+    const max_length = Math.min(
+      Math.max(Math.floor(duration * 50), 1) + 4,
+      model.generation_config.max_length ?? 1500,
+    );
+
+    // Create a streamer to update progress
+    let num_tokens = 0;
+    const streamer = new CallbackStreamer((value) => {
+      const percent = value === undefined ? 1 : ++num_tokens / max_length;
+      setStatusText(`Generating (${(percent * 100).toFixed()}%)...`);
+      setProgress(percent);
+    });
+
+    // Tokenize input text
+    const inputs = tokenizer(textInput);
+
+    // Generate music
+    const audio_values = await model.generate({
+      // Inputs
+      ...inputs,
+
+      // Generation parameters
+      max_length,
+      guidance_scale,
+      temperature,
+
+      // Outputs
+      streamer,
+    });
+
+    setStatusText('Encoding audio...');
+
+    // Encode audio values to WAV
+    const sampling_rate = model.config.audio_encoder.sampling_rate;
+    const wav = encodeWAV(audio_values.data, sampling_rate);
+    const blob = new Blob([wav], { type: 'audio/wav' });
+    setResult(blob);
+
+    audioRef.current.src = URL.createObjectURL(blob);
+    setStatusText('Done!');
+  };
+
+  return (
+    <div className="container mx-auto p-8">
+      <h1 className="text-5xl font-bold mb-2">MusicGen Web</h1>
+      <h2 className="text-2xl font-semibold mb-4">In-browser text-to-music w/ <a className="underline" href="https://github.com/huggingface/transformers.js">🤗 Transformers.js!</a>
+      </h2>
+
+      {/* Text input for user */}
+      <input
+        type="text"
+        placeholder="Describe the music to generate..."
+        value={textInput}
+        onChange={(e) => setTextInput(e.target.value)}
+        className="border border-gray-300 p-2 mb-4 w-full rounded"
+      />
+
+      {/* Example buttons */}
+      <div className="mb-4 flex gap-2 justify-center text-sm">
+        {EXAMPLES.map((example, i) => (
+          <button key={i} className="bg-blue-500 hover:bg-blue-400 transition-colors duration-100 text-white px-2 py-2 rounded" onClick={(e) => setTextInput(e.target.innerText)}>{example}</button>
+        ))}
+      </div>
+
+      {/* Generation parameters */}
+      <div className="flex mb-4 justify-center gap-2">
+        {/* Duration */}
+        <div>
+          <label className="block text-sm font-semibold mb-1">Duration</label>
+          <input type="range" min={1} max={30} value={duration} onChange={(e) => setDuration(e.target.value)} />
+          <p className="text-sm text-center">{`${duration} second${duration > 1 ? 's' : ''}`}</p>
+        </div>
+
+        {/* Guidance Scale */}
+        <div className="mr-4">
+          <label className="block text-sm font-semibold mb-1">Guidance Scale</label>
+          <input type="range" min={1} max={10} value={guidance_scale} onChange={(e) => setGuidanceScale(e.target.value)} />
+          <p className="text-sm text-center">{guidance_scale}</p>
+        </div>
+
+        {/* Temperature */}
+        <div>
+          <label className="block text-sm font-semibold mb-1">Temperature</label>
+          <input type="range" min={0.1} max={2} step={0.1} value={temperature} onChange={(e) => setTemperature(e.target.value)} />
+          <p className="text-sm text-center">{temperature}</p>
+        </div>
+      </div>
+
+      {/* Button to generate music */}
+      <button className="mb-4 bg-green-500 hover:bg-green-400 transition-colors duration-100 text-white px-4 py-3 rounded-lg font-semibold" onClick={generateMusic}>Generate Music</button>
+
+      {/* Progress bar */}
+      <div className="mb-4">
+        <div className="bg-gray-200 h-4 w-full rounded-full">
+          <div className="bg-blue-500 h-4 rounded-full" style={{ width: `${100 * progress}%` }}></div>
+        </div>
+        <p className="text-sm text-center mt-1">{statusText}</p>
+      </div>
+
+      {/* Audio player */}
+      {<div className="flex justify-center flex-col items-center">
+        <audio ref={audioRef} controls type="audio/wav" />
+        {SHARING_ENABLED && result &&
+          <button
+            className="bg-red-500 hover:bg-red-400 transition-colors duration-100 text-white px-2 py-1 my-2 rounded-lg text-sm"
+            onClick={async (e) => {
+              e.target.disabled = true;
+              e.target.innerText = 'Uploading...';
+              await share(result, {
+                prompt: textInput,
+                duration,
+                guidance_scale,
+                temperature,
+              });
+              e.target.disabled = false;
+              e.target.innerText = 'Share';
+            }
+            }>Share</button>
+        }
+      </div>}
+    </div>
+  );
+};
+
+export default App;
diff --git a/examples/musicgen-web/src/index.css b/examples/musicgen-web/src/index.css
new file mode 100644
index 000000000..bd6213e1d
--- /dev/null
+++ b/examples/musicgen-web/src/index.css
@@ -0,0 +1,3 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
\ No newline at end of file
diff --git a/examples/musicgen-web/src/main.jsx b/examples/musicgen-web/src/main.jsx
new file mode 100644
index 000000000..54b39dd1d
--- /dev/null
+++ b/examples/musicgen-web/src/main.jsx
@@ -0,0 +1,10 @@
+import React from 'react'
+import ReactDOM from 'react-dom/client'
+import App from './App.jsx'
+import './index.css'
+
+ReactDOM.createRoot(document.getElementById('root')).render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>,
+)
diff --git a/examples/musicgen-web/src/utils.js b/examples/musicgen-web/src/utils.js
new file mode 100644
index 000000000..436c9daab
--- /dev/null
+++ b/examples/musicgen-web/src/utils.js
@@ -0,0 +1,59 @@
+
+// Adapted from https://www.npmjs.com/package/audiobuffer-to-wav
+export function encodeWAV(samples, sampleRate = 16000) {
+    let offset = 44;
+    const buffer = new ArrayBuffer(offset + samples.length * 4);
+    const view = new DataView(buffer);
+
+    /* RIFF identifier */
+    writeString(view, 0, 'RIFF')
+    /* RIFF chunk length */
+    view.setUint32(4, 36 + samples.length * 4, true)
+    /* RIFF type */
+    writeString(view, 8, 'WAVE')
+    /* format chunk identifier */
+    writeString(view, 12, 'fmt ')
+    /* format chunk length */
+    view.setUint32(16, 16, true)
+    /* sample format (raw) */
+    view.setUint16(20, 3, true)
+    /* channel count */
+    view.setUint16(22, 1, true)
+    /* sample rate */
+    view.setUint32(24, sampleRate, true)
+    /* byte rate (sample rate * block align) */
+    view.setUint32(28, sampleRate * 4, true)
+    /* block align (channel count * bytes per sample) */
+    view.setUint16(32, 4, true)
+    /* bits per sample */
+    view.setUint16(34, 32, true)
+    /* data chunk identifier */
+    writeString(view, 36, 'data')
+    /* data chunk length */
+    view.setUint32(40, samples.length * 4, true)
+
+    for (let i = 0; i < samples.length; ++i, offset += 4) {
+        view.setFloat32(offset, samples[i], true)
+    }
+
+    return buffer
+}
+function writeString(view, offset, string) {
+    for (let i = 0; i < string.length; ++i) {
+        view.setUint8(offset + i, string.charCodeAt(i))
+    }
+}
+
+export async function share(body, settings) {
+    const response = await fetch('https://huggingface.co/uploads', { method: 'POST', body });
+    if (!response.ok) throw new Error(`Failed to upload audio: ${response.statusText}`);
+    const url = await response.text();
+
+    const params = new URLSearchParams({
+        title: `🎵 ${settings.prompt}`,
+        description: `<audio controls src="${url}"></audio>\n${JSON.stringify(settings, null, 2)}`,
+    });
+
+    const shareURL = `https://huggingface.co/spaces/Xenova/musicgen-web/discussions/new?${params.toString()}`;
+    window.open(shareURL, '_blank');
+}
\ No newline at end of file
diff --git a/examples/musicgen-web/tailwind.config.js b/examples/musicgen-web/tailwind.config.js
new file mode 100644
index 000000000..d37737fc0
--- /dev/null
+++ b/examples/musicgen-web/tailwind.config.js
@@ -0,0 +1,12 @@
+/** @type {import('tailwindcss').Config} */
+export default {
+  content: [
+    "./index.html",
+    "./src/**/*.{js,ts,jsx,tsx}",
+  ],
+  theme: {
+    extend: {},
+  },
+  plugins: [],
+}
+
diff --git a/examples/musicgen-web/vite.config.js b/examples/musicgen-web/vite.config.js
new file mode 100644
index 000000000..5a33944a9
--- /dev/null
+++ b/examples/musicgen-web/vite.config.js
@@ -0,0 +1,7 @@
+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+
+// https://vitejs.dev/config/
+export default defineConfig({
+  plugins: [react()],
+})
diff --git a/examples/next-client/package-lock.json b/examples/next-client/package-lock.json
index 2a76aef90..d03dc91e7 100644
--- a/examples/next-client/package-lock.json
+++ b/examples/next-client/package-lock.json
@@ -8,7 +8,7 @@
       "name": "next",
       "version": "0.1.0",
       "dependencies": {
-        "@xenova/transformers": "^2.4.2",
+        "@huggingface/transformers": "^3.0.0-alpha.5",
         "autoprefixer": "10.4.14",
         "eslint": "8.45.0",
         "eslint-config-next": "13.4.12",
@@ -49,6 +49,15 @@
         "node": ">=6.9.0"
       }
     },
+    "node_modules/@emnapi/runtime": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.2.0.tgz",
+      "integrity": "sha512-bV21/9LQmcQeCPEg3BDFtvwL6cwiTMksYNWQQ4KOxCZikEGalWtenoZ0wCiukJINlGCIi2KXx01g4FoH/LxpzQ==",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
     "node_modules/@eslint-community/eslint-utils": {
       "version": "4.4.0",
       "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz",
@@ -57,79 +66,663 @@
         "eslint-visitor-keys": "^3.3.0"
       },
       "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+      },
+      "peerDependencies": {
+        "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0"
+      }
+    },
+    "node_modules/@eslint-community/regexpp": {
+      "version": "4.6.0",
+      "resolved": "https://registry.npmjs.org/@eslint-community/regexpp/-/regexpp-4.6.0.tgz",
+      "integrity": "sha512-uiPeRISaglZnaZk8vwrjQZ1CxogZeY/4IYft6gBOTqu1WhVXWmCmZMWxUv2Q/pxSvPdp1JPaO62kLOcOkMqWrw==",
+      "engines": {
+        "node": "^12.0.0 || ^14.0.0 || >=16.0.0"
+      }
+    },
+    "node_modules/@eslint/eslintrc": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-2.1.0.tgz",
+      "integrity": "sha512-Lj7DECXqIVCqnqjjHMPna4vn6GJcMgul/wuS0je9OZ9gsL0zzDpKPVtcG1HaDVc+9y+qgXneTeUMbCqXJNpH1A==",
+      "dependencies": {
+        "ajv": "^6.12.4",
+        "debug": "^4.3.2",
+        "espree": "^9.6.0",
+        "globals": "^13.19.0",
+        "ignore": "^5.2.0",
+        "import-fresh": "^3.2.1",
+        "js-yaml": "^4.1.0",
+        "minimatch": "^3.1.2",
+        "strip-json-comments": "^3.1.1"
+      },
+      "engines": {
+        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/eslint"
+      }
+    },
+    "node_modules/@eslint/js": {
+      "version": "8.44.0",
+      "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.44.0.tgz",
+      "integrity": "sha512-Ag+9YM4ocKQx9AarydN0KY2j0ErMHNIocPDrVo8zAE44xLTjEtz81OdR68/cydGtk6m6jDb5Za3r2useMzYmSw==",
+      "engines": {
+        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+      }
+    },
+    "node_modules/@huggingface/jinja": {
+      "version": "0.3.0",
+      "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.3.0.tgz",
+      "integrity": "sha512-GLJzso0M07ZncFkrJMIXVU4os6GFbPocD4g8fMQPMGJubf48FtGOsUORH2rtFdXPIPelz8SLBMn8ZRmOTwXm9Q==",
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/@huggingface/transformers": {
+      "version": "3.0.0-alpha.5",
+      "resolved": "https://registry.npmjs.org/@huggingface/transformers/-/transformers-3.0.0-alpha.5.tgz",
+      "integrity": "sha512-GFJ3YfOq+Ax1LvDECOhvLay0sqCbkE1q3roloRYrYoflOUY+YX1A5ez+hfmDyN65blC7eFf4UQ9yWHmyKBkBiw==",
+      "dependencies": {
+        "@huggingface/jinja": "^0.3.0",
+        "onnxruntime-node": "1.18.0",
+        "onnxruntime-web": "1.19.0-dev.20240804-ee2fe87e2d",
+        "sharp": "^0.33.2"
+      }
+    },
+    "node_modules/@huggingface/transformers/node_modules/long": {
+      "version": "5.2.3",
+      "resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz",
+      "integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q=="
+    },
+    "node_modules/@huggingface/transformers/node_modules/onnxruntime-common": {
+      "version": "1.18.0",
+      "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.18.0.tgz",
+      "integrity": "sha512-lufrSzX6QdKrktAELG5x5VkBpapbCeS3dQwrXbN0eD9rHvU0yAWl7Ztju9FvgAKWvwd/teEKJNj3OwM6eTZh3Q=="
+    },
+    "node_modules/@huggingface/transformers/node_modules/onnxruntime-node": {
+      "version": "1.18.0",
+      "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.18.0.tgz",
+      "integrity": "sha512-iTnFcxKpmywCatx8ov4GTbECe3tJk2Bp1OA2mWRJde78q+7tpPYBhKMnwhlaoKy9oKQcy4UoEuuhoy2PSD13ww==",
+      "hasInstallScript": true,
+      "os": [
+        "win32",
+        "darwin",
+        "linux"
+      ],
+      "dependencies": {
+        "onnxruntime-common": "1.18.0",
+        "tar": "^7.0.1"
+      }
+    },
+    "node_modules/@huggingface/transformers/node_modules/onnxruntime-web": {
+      "version": "1.19.0-dev.20240804-ee2fe87e2d",
+      "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.19.0-dev.20240804-ee2fe87e2d.tgz",
+      "integrity": "sha512-uz93GKeBjHHq0150qIAxGGMhf5YLnfh12OChvYyLG2H6LzXymXhorvcxV7sklofw6fVooL3IutMz8nbZLMQxYg==",
+      "dependencies": {
+        "flatbuffers": "^1.12.0",
+        "guid-typescript": "^1.0.9",
+        "long": "^5.2.3",
+        "onnxruntime-common": "1.19.0-dev.20240730-530a2d7b41",
+        "platform": "^1.3.6",
+        "protobufjs": "^7.2.4"
+      }
+    },
+    "node_modules/@huggingface/transformers/node_modules/onnxruntime-web/node_modules/onnxruntime-common": {
+      "version": "1.19.0-dev.20240730-530a2d7b41",
+      "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.19.0-dev.20240730-530a2d7b41.tgz",
+      "integrity": "sha512-fWyg0USjvdHY5JL+3y/fXUDTOl9OLfhrX+sttfM2LW7jT/O8VNxjc16oAjyJHJruOQdrH2qo+KnxjOLA68i2dw=="
+    },
+    "node_modules/@huggingface/transformers/node_modules/sharp": {
+      "version": "0.33.4",
+      "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.33.4.tgz",
+      "integrity": "sha512-7i/dt5kGl7qR4gwPRD2biwD2/SvBn3O04J77XKFgL2OnZtQw+AG9wnuS/csmu80nPRHLYE9E41fyEiG8nhH6/Q==",
+      "hasInstallScript": true,
+      "dependencies": {
+        "color": "^4.2.3",
+        "detect-libc": "^2.0.3",
+        "semver": "^7.6.0"
+      },
+      "engines": {
+        "libvips": ">=8.15.2",
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-darwin-arm64": "0.33.4",
+        "@img/sharp-darwin-x64": "0.33.4",
+        "@img/sharp-libvips-darwin-arm64": "1.0.2",
+        "@img/sharp-libvips-darwin-x64": "1.0.2",
+        "@img/sharp-libvips-linux-arm": "1.0.2",
+        "@img/sharp-libvips-linux-arm64": "1.0.2",
+        "@img/sharp-libvips-linux-s390x": "1.0.2",
+        "@img/sharp-libvips-linux-x64": "1.0.2",
+        "@img/sharp-libvips-linuxmusl-arm64": "1.0.2",
+        "@img/sharp-libvips-linuxmusl-x64": "1.0.2",
+        "@img/sharp-linux-arm": "0.33.4",
+        "@img/sharp-linux-arm64": "0.33.4",
+        "@img/sharp-linux-s390x": "0.33.4",
+        "@img/sharp-linux-x64": "0.33.4",
+        "@img/sharp-linuxmusl-arm64": "0.33.4",
+        "@img/sharp-linuxmusl-x64": "0.33.4",
+        "@img/sharp-wasm32": "0.33.4",
+        "@img/sharp-win32-ia32": "0.33.4",
+        "@img/sharp-win32-x64": "0.33.4"
+      }
+    },
+    "node_modules/@humanwhocodes/config-array": {
+      "version": "0.11.10",
+      "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.10.tgz",
+      "integrity": "sha512-KVVjQmNUepDVGXNuoRRdmmEjruj0KfiGSbS8LVc12LMsWDQzRXJ0qdhN8L8uUigKpfEHRhlaQFY0ib1tnUbNeQ==",
+      "dependencies": {
+        "@humanwhocodes/object-schema": "^1.2.1",
+        "debug": "^4.1.1",
+        "minimatch": "^3.0.5"
+      },
+      "engines": {
+        "node": ">=10.10.0"
+      }
+    },
+    "node_modules/@humanwhocodes/module-importer": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz",
+      "integrity": "sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA==",
+      "engines": {
+        "node": ">=12.22"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/nzakas"
+      }
+    },
+    "node_modules/@humanwhocodes/object-schema": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-1.2.1.tgz",
+      "integrity": "sha512-ZnQMnLV4e7hDlUvw8H+U8ASL02SS2Gn6+9Ac3wGGLIe7+je2AeAOxPY+izIPJDfFDb7eDjev0Us8MO1iFRN8hA=="
+    },
+    "node_modules/@img/sharp-darwin-arm64": {
+      "version": "0.33.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.33.4.tgz",
+      "integrity": "sha512-p0suNqXufJs9t3RqLBO6vvrgr5OhgbWp76s5gTRvdmxmuv9E1rcaqGUsl3l4mKVmXPkTkTErXediAui4x+8PSA==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "glibc": ">=2.26",
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-darwin-arm64": "1.0.2"
+      }
+    },
+    "node_modules/@img/sharp-darwin-x64": {
+      "version": "0.33.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.33.4.tgz",
+      "integrity": "sha512-0l7yRObwtTi82Z6ebVI2PnHT8EB2NxBgpK2MiKJZJ7cz32R4lxd001ecMhzzsZig3Yv9oclvqqdV93jo9hy+Dw==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "glibc": ">=2.26",
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-darwin-x64": "1.0.2"
+      }
+    },
+    "node_modules/@img/sharp-libvips-darwin-arm64": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.0.2.tgz",
+      "integrity": "sha512-tcK/41Rq8IKlSaKRCCAuuY3lDJjQnYIW1UXU1kxcEKrfL8WR7N6+rzNoOxoQRJWTAECuKwgAHnPvqXGN8XfkHA==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "macos": ">=11",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-darwin-x64": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.0.2.tgz",
+      "integrity": "sha512-Ofw+7oaWa0HiiMiKWqqaZbaYV3/UGL2wAPeLuJTx+9cXpCRdvQhCLG0IH8YGwM0yGWGLpsF4Su9vM1o6aer+Fw==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "macos": ">=10.13",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-arm": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.0.2.tgz",
+      "integrity": "sha512-iLWCvrKgeFoglQxdEwzu1eQV04o8YeYGFXtfWU26Zr2wWT3q3MTzC+QTCO3ZQfWd3doKHT4Pm2kRmLbupT+sZw==",
+      "cpu": [
+        "arm"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "glibc": ">=2.28",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-arm64": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.0.2.tgz",
+      "integrity": "sha512-x7kCt3N00ofFmmkkdshwj3vGPCnmiDh7Gwnd4nUwZln2YjqPxV1NlTyZOvoDWdKQVDL911487HOueBvrpflagw==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "glibc": ">=2.26",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-s390x": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.0.2.tgz",
+      "integrity": "sha512-cmhQ1J4qVhfmS6szYW7RT+gLJq9dH2i4maq+qyXayUSn9/3iY2ZeWpbAgSpSVbV2E1JUL2Gg7pwnYQ1h8rQIog==",
+      "cpu": [
+        "s390x"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "glibc": ">=2.28",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-x64": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.0.2.tgz",
+      "integrity": "sha512-E441q4Qdb+7yuyiADVi5J+44x8ctlrqn8XgkDTwr4qPJzWkaHwD489iZ4nGDgcuya4iMN3ULV6NwbhRZJ9Z7SQ==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "glibc": ">=2.26",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linuxmusl-arm64": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.0.2.tgz",
+      "integrity": "sha512-3CAkndNpYUrlDqkCM5qhksfE+qSIREVpyoeHIU6jd48SJZViAmznoQQLAv4hVXF7xyUB9zf+G++e2v1ABjCbEQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "musl": ">=1.2.2",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linuxmusl-x64": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.0.2.tgz",
+      "integrity": "sha512-VI94Q6khIHqHWNOh6LLdm9s2Ry4zdjWJwH56WoiJU7NTeDwyApdZZ8c+SADC8OH98KWNQXnE01UdJ9CSfZvwZw==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "musl": ">=1.2.2",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-linux-arm": {
+      "version": "0.33.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.33.4.tgz",
+      "integrity": "sha512-RUgBD1c0+gCYZGCCe6mMdTiOFS0Zc/XrN0fYd6hISIKcDUbAW5NtSQW9g/powkrXYm6Vzwd6y+fqmExDuCdHNQ==",
+      "cpu": [
+        "arm"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "glibc": ">=2.28",
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-arm": "1.0.2"
+      }
+    },
+    "node_modules/@img/sharp-linux-arm64": {
+      "version": "0.33.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.33.4.tgz",
+      "integrity": "sha512-2800clwVg1ZQtxwSoTlHvtm9ObgAax7V6MTAB/hDT945Tfyy3hVkmiHpeLPCKYqYR1Gcmv1uDZ3a4OFwkdBL7Q==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "glibc": ">=2.26",
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-arm64": "1.0.2"
+      }
+    },
+    "node_modules/@img/sharp-linux-s390x": {
+      "version": "0.33.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.33.4.tgz",
+      "integrity": "sha512-h3RAL3siQoyzSoH36tUeS0PDmb5wINKGYzcLB5C6DIiAn2F3udeFAum+gj8IbA/82+8RGCTn7XW8WTFnqag4tQ==",
+      "cpu": [
+        "s390x"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "glibc": ">=2.31",
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-s390x": "1.0.2"
+      }
+    },
+    "node_modules/@img/sharp-linux-x64": {
+      "version": "0.33.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.33.4.tgz",
+      "integrity": "sha512-GoR++s0XW9DGVi8SUGQ/U4AeIzLdNjHka6jidVwapQ/JebGVQIpi52OdyxCNVRE++n1FCLzjDovJNozif7w/Aw==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "glibc": ">=2.26",
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-x64": "1.0.2"
+      }
+    },
+    "node_modules/@img/sharp-linuxmusl-arm64": {
+      "version": "0.33.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.33.4.tgz",
+      "integrity": "sha512-nhr1yC3BlVrKDTl6cO12gTpXMl4ITBUZieehFvMntlCXFzH2bvKG76tBL2Y/OqhupZt81pR7R+Q5YhJxW0rGgQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "musl": ">=1.2.2",
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linuxmusl-arm64": "1.0.2"
+      }
+    },
+    "node_modules/@img/sharp-linuxmusl-x64": {
+      "version": "0.33.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.33.4.tgz",
+      "integrity": "sha512-uCPTku0zwqDmZEOi4ILyGdmW76tH7dm8kKlOIV1XC5cLyJ71ENAAqarOHQh0RLfpIpbV5KOpXzdU6XkJtS0daw==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "musl": ">=1.2.2",
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linuxmusl-x64": "1.0.2"
+      }
+    },
+    "node_modules/@img/sharp-wasm32": {
+      "version": "0.33.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.33.4.tgz",
+      "integrity": "sha512-Bmmauh4sXUsUqkleQahpdNXKvo+wa1V9KhT2pDA4VJGKwnKMJXiSTGphn0gnJrlooda0QxCtXc6RX1XAU6hMnQ==",
+      "cpu": [
+        "wasm32"
+      ],
+      "optional": true,
+      "dependencies": {
+        "@emnapi/runtime": "^1.1.1"
+      },
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-win32-ia32": {
+      "version": "0.33.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.33.4.tgz",
+      "integrity": "sha512-99SJ91XzUhYHbx7uhK3+9Lf7+LjwMGQZMDlO/E/YVJ7Nc3lyDFZPGhjwiYdctoH2BOzW9+TnfqcaMKt0jHLdqw==",
+      "cpu": [
+        "ia32"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
       },
-      "peerDependencies": {
-        "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0"
+      "funding": {
+        "url": "https://opencollective.com/libvips"
       }
     },
-    "node_modules/@eslint-community/regexpp": {
-      "version": "4.6.0",
-      "resolved": "https://registry.npmjs.org/@eslint-community/regexpp/-/regexpp-4.6.0.tgz",
-      "integrity": "sha512-uiPeRISaglZnaZk8vwrjQZ1CxogZeY/4IYft6gBOTqu1WhVXWmCmZMWxUv2Q/pxSvPdp1JPaO62kLOcOkMqWrw==",
+    "node_modules/@img/sharp-win32-x64": {
+      "version": "0.33.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.33.4.tgz",
+      "integrity": "sha512-3QLocdTRVIrFNye5YocZl+KKpYKP+fksi1QhmOArgx7GyhIbQp/WrJRu176jm8IxromS7RIkzMiMINVdBtC8Aw==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ],
       "engines": {
-        "node": "^12.0.0 || ^14.0.0 || >=16.0.0"
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0",
+        "npm": ">=9.6.5",
+        "pnpm": ">=7.1.0",
+        "yarn": ">=3.2.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
       }
     },
-    "node_modules/@eslint/eslintrc": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-2.1.0.tgz",
-      "integrity": "sha512-Lj7DECXqIVCqnqjjHMPna4vn6GJcMgul/wuS0je9OZ9gsL0zzDpKPVtcG1HaDVc+9y+qgXneTeUMbCqXJNpH1A==",
+    "node_modules/@isaacs/cliui": {
+      "version": "8.0.2",
+      "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz",
+      "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==",
       "dependencies": {
-        "ajv": "^6.12.4",
-        "debug": "^4.3.2",
-        "espree": "^9.6.0",
-        "globals": "^13.19.0",
-        "ignore": "^5.2.0",
-        "import-fresh": "^3.2.1",
-        "js-yaml": "^4.1.0",
-        "minimatch": "^3.1.2",
-        "strip-json-comments": "^3.1.1"
+        "string-width": "^5.1.2",
+        "string-width-cjs": "npm:string-width@^4.2.0",
+        "strip-ansi": "^7.0.1",
+        "strip-ansi-cjs": "npm:strip-ansi@^6.0.1",
+        "wrap-ansi": "^8.1.0",
+        "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0"
       },
       "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/eslint"
+        "node": ">=12"
       }
     },
-    "node_modules/@eslint/js": {
-      "version": "8.44.0",
-      "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.44.0.tgz",
-      "integrity": "sha512-Ag+9YM4ocKQx9AarydN0KY2j0ErMHNIocPDrVo8zAE44xLTjEtz81OdR68/cydGtk6m6jDb5Za3r2useMzYmSw==",
+    "node_modules/@isaacs/cliui/node_modules/ansi-regex": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz",
+      "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==",
       "engines": {
-        "node": "^12.22.0 || ^14.17.0 || >=16.0.0"
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-regex?sponsor=1"
       }
     },
-    "node_modules/@humanwhocodes/config-array": {
-      "version": "0.11.10",
-      "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.10.tgz",
-      "integrity": "sha512-KVVjQmNUepDVGXNuoRRdmmEjruj0KfiGSbS8LVc12LMsWDQzRXJ0qdhN8L8uUigKpfEHRhlaQFY0ib1tnUbNeQ==",
+    "node_modules/@isaacs/cliui/node_modules/strip-ansi": {
+      "version": "7.1.0",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz",
+      "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==",
       "dependencies": {
-        "@humanwhocodes/object-schema": "^1.2.1",
-        "debug": "^4.1.1",
-        "minimatch": "^3.0.5"
+        "ansi-regex": "^6.0.1"
       },
       "engines": {
-        "node": ">=10.10.0"
-      }
-    },
-    "node_modules/@humanwhocodes/module-importer": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz",
-      "integrity": "sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA==",
-      "engines": {
-        "node": ">=12.22"
+        "node": ">=12"
       },
       "funding": {
-        "type": "github",
-        "url": "https://github.com/sponsors/nzakas"
+        "url": "https://github.com/chalk/strip-ansi?sponsor=1"
       }
     },
-    "node_modules/@humanwhocodes/object-schema": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-1.2.1.tgz",
-      "integrity": "sha512-ZnQMnLV4e7hDlUvw8H+U8ASL02SS2Gn6+9Ac3wGGLIe7+je2AeAOxPY+izIPJDfFDb7eDjev0Us8MO1iFRN8hA=="
+    "node_modules/@isaacs/fs-minipass": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz",
+      "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==",
+      "dependencies": {
+        "minipass": "^7.0.4"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
     },
     "node_modules/@jridgewell/gen-mapping": {
       "version": "0.3.3",
@@ -359,6 +952,15 @@
         "node": ">= 8"
       }
     },
+    "node_modules/@pkgjs/parseargs": {
+      "version": "0.11.0",
+      "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
+      "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==",
+      "optional": true,
+      "engines": {
+        "node": ">=14"
+      }
+    },
     "node_modules/@pkgr/utils": {
       "version": "2.4.2",
       "resolved": "https://registry.npmjs.org/@pkgr/utils/-/utils-2.4.2.tgz",
@@ -557,18 +1159,6 @@
         "url": "https://opencollective.com/typescript-eslint"
       }
     },
-    "node_modules/@xenova/transformers": {
-      "version": "2.4.2",
-      "resolved": "https://registry.npmjs.org/@xenova/transformers/-/transformers-2.4.2.tgz",
-      "integrity": "sha512-m1QlvNsic/kQJ1F1N02TpYkIBPwB68hZGljO32EM4mHEw4nKlPoQ/9gZ+oUKkavKC/LqgCnmiNQ8jWfa4Zl5AQ==",
-      "dependencies": {
-        "onnxruntime-web": "1.14.0",
-        "sharp": "^0.32.0"
-      },
-      "optionalDependencies": {
-        "onnxruntime-node": "1.14.0"
-      }
-    },
     "node_modules/acorn": {
       "version": "8.10.0",
       "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.10.0.tgz",
@@ -827,35 +1417,11 @@
         "dequal": "^2.0.3"
       }
     },
-    "node_modules/b4a": {
-      "version": "1.6.4",
-      "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.4.tgz",
-      "integrity": "sha512-fpWrvyVHEKyeEvbKZTVOeZF3VSKKWtJxFIxX/jaVPf+cLbGUSitjb49pHLqPV2BUNNZ0LcoeEGfE/YCpyDYHIw=="
-    },
     "node_modules/balanced-match": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
       "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
     },
-    "node_modules/base64-js": {
-      "version": "1.5.1",
-      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
-      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
     "node_modules/big-integer": {
       "version": "1.6.51",
       "resolved": "https://registry.npmjs.org/big-integer/-/big-integer-1.6.51.tgz",
@@ -872,16 +1438,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/bl": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
-      "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
-      "dependencies": {
-        "buffer": "^5.5.0",
-        "inherits": "^2.0.4",
-        "readable-stream": "^3.4.0"
-      }
-    },
     "node_modules/bplist-parser": {
       "version": "0.2.0",
       "resolved": "https://registry.npmjs.org/bplist-parser/-/bplist-parser-0.2.0.tgz",
@@ -944,29 +1500,6 @@
         "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7"
       }
     },
-    "node_modules/buffer": {
-      "version": "5.7.1",
-      "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
-      "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "dependencies": {
-        "base64-js": "^1.3.1",
-        "ieee754": "^1.1.13"
-      }
-    },
     "node_modules/bundle-name": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/bundle-name/-/bundle-name-3.0.0.tgz",
@@ -1091,11 +1624,6 @@
         "node": ">= 6"
       }
     },
-    "node_modules/chownr": {
-      "version": "1.1.4",
-      "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
-      "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg=="
-    },
     "node_modules/client-only": {
       "version": "0.0.1",
       "resolved": "https://registry.npmjs.org/client-only/-/client-only-0.0.1.tgz",
@@ -1196,28 +1724,6 @@
         }
       }
     },
-    "node_modules/decompress-response": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
-      "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
-      "dependencies": {
-        "mimic-response": "^3.1.0"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
-    "node_modules/deep-extend": {
-      "version": "0.6.0",
-      "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz",
-      "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
-      "engines": {
-        "node": ">=4.0.0"
-      }
-    },
     "node_modules/deep-is": {
       "version": "0.1.4",
       "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz",
@@ -1290,9 +1796,9 @@
       }
     },
     "node_modules/detect-libc": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.2.tgz",
-      "integrity": "sha512-UX6sGumvvqSaXgdKGUsgZWqcUyIXZ/vZTrlRT/iobiKhGL0zL4d3osHj3uqllWJK+i+sixDS/3COVEOFbupFyw==",
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.3.tgz",
+      "integrity": "sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw==",
       "engines": {
         "node": ">=8"
       }
@@ -1329,6 +1835,11 @@
         "node": ">=6.0.0"
       }
     },
+    "node_modules/eastasianwidth": {
+      "version": "0.2.0",
+      "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz",
+      "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
+    },
     "node_modules/electron-to-chromium": {
       "version": "1.4.468",
       "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.468.tgz",
@@ -1339,14 +1850,6 @@
       "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz",
       "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg=="
     },
-    "node_modules/end-of-stream": {
-      "version": "1.4.4",
-      "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz",
-      "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==",
-      "dependencies": {
-        "once": "^1.4.0"
-      }
-    },
     "node_modules/enhanced-resolve": {
       "version": "5.15.0",
       "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.15.0.tgz",
@@ -1909,24 +2412,11 @@
         "url": "https://github.com/sindresorhus/execa?sponsor=1"
       }
     },
-    "node_modules/expand-template": {
-      "version": "2.0.3",
-      "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
-      "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==",
-      "engines": {
-        "node": ">=6"
-      }
-    },
     "node_modules/fast-deep-equal": {
       "version": "3.1.3",
       "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
       "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="
     },
-    "node_modules/fast-fifo": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.0.tgz",
-      "integrity": "sha512-IgfweLvEpwyA4WgiQe9Nx6VV2QkML2NkvZnk1oKnIzXgXdWxuhF7zw4DvLTPZJn6PIUneiAXPF24QmoEqHTjyw=="
-    },
     "node_modules/fast-glob": {
       "version": "3.3.1",
       "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.1.tgz",
@@ -2038,6 +2528,32 @@
         "is-callable": "^1.1.3"
       }
     },
+    "node_modules/foreground-child": {
+      "version": "3.3.0",
+      "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.0.tgz",
+      "integrity": "sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg==",
+      "dependencies": {
+        "cross-spawn": "^7.0.0",
+        "signal-exit": "^4.0.1"
+      },
+      "engines": {
+        "node": ">=14"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/foreground-child/node_modules/signal-exit": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz",
+      "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==",
+      "engines": {
+        "node": ">=14"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
     "node_modules/fraction.js": {
       "version": "4.2.0",
       "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.2.0.tgz",
@@ -2050,11 +2566,6 @@
         "url": "https://www.patreon.com/infusion"
       }
     },
-    "node_modules/fs-constants": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
-      "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow=="
-    },
     "node_modules/fs.realpath": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
@@ -2154,11 +2665,6 @@
         "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
       }
     },
-    "node_modules/github-from-package": {
-      "version": "0.0.0",
-      "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz",
-      "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw=="
-    },
     "node_modules/glob": {
       "version": "7.1.7",
       "resolved": "https://registry.npmjs.org/glob/-/glob-7.1.7.tgz",
@@ -2344,25 +2850,6 @@
         "node": ">=14.18.0"
       }
     },
-    "node_modules/ieee754": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
-      "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
     "node_modules/ignore": {
       "version": "5.2.4",
       "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.2.4.tgz",
@@ -2408,11 +2895,6 @@
       "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
       "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
     },
-    "node_modules/ini": {
-      "version": "1.3.8",
-      "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
-      "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew=="
-    },
     "node_modules/internal-slot": {
       "version": "1.0.5",
       "resolved": "https://registry.npmjs.org/internal-slot/-/internal-slot-1.0.5.tgz",
@@ -2539,6 +3021,14 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/is-fullwidth-code-point": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
+      "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/is-glob": {
       "version": "4.0.3",
       "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz",
@@ -2733,6 +3223,20 @@
       "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
       "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="
     },
+    "node_modules/jackspeak": {
+      "version": "3.4.3",
+      "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz",
+      "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==",
+      "dependencies": {
+        "@isaacs/cliui": "^8.0.2"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      },
+      "optionalDependencies": {
+        "@pkgjs/parseargs": "^0.11.0"
+      }
+    },
     "node_modules/jiti": {
       "version": "1.19.1",
       "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.19.1.tgz",
@@ -2849,11 +3353,6 @@
       "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz",
       "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ=="
     },
-    "node_modules/long": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
-      "integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA=="
-    },
     "node_modules/loose-envify": {
       "version": "1.4.0",
       "resolved": "https://registry.npmjs.org/loose-envify/-/loose-envify-1.4.0.tgz",
@@ -2866,15 +3365,9 @@
       }
     },
     "node_modules/lru-cache": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
-      "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==",
-      "dependencies": {
-        "yallist": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=10"
-      }
+      "version": "10.4.3",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz",
+      "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="
     },
     "node_modules/merge-stream": {
       "version": "2.0.0",
@@ -2912,17 +3405,6 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/mimic-response": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
-      "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==",
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/minimatch": {
       "version": "3.1.2",
       "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
@@ -2939,13 +3421,97 @@
       "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
       "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
       "funding": {
-        "url": "https://github.com/sponsors/ljharb"
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/minipass": {
+      "version": "7.1.2",
+      "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz",
+      "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==",
+      "engines": {
+        "node": ">=16 || 14 >=14.17"
+      }
+    },
+    "node_modules/minizlib": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.1.tgz",
+      "integrity": "sha512-umcy022ILvb5/3Djuu8LWeqUa8D68JaBzlttKeMWen48SjabqS3iY5w/vzeMzMUNhLDifyhbOwKDSznB1vvrwg==",
+      "dependencies": {
+        "minipass": "^7.0.4",
+        "rimraf": "^5.0.5"
+      },
+      "engines": {
+        "node": ">= 18"
+      }
+    },
+    "node_modules/minizlib/node_modules/brace-expansion": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
+      "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
+      "dependencies": {
+        "balanced-match": "^1.0.0"
+      }
+    },
+    "node_modules/minizlib/node_modules/glob": {
+      "version": "10.4.5",
+      "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz",
+      "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==",
+      "dependencies": {
+        "foreground-child": "^3.1.0",
+        "jackspeak": "^3.1.2",
+        "minimatch": "^9.0.4",
+        "minipass": "^7.1.2",
+        "package-json-from-dist": "^1.0.0",
+        "path-scurry": "^1.11.1"
+      },
+      "bin": {
+        "glob": "dist/esm/bin.mjs"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/minizlib/node_modules/minimatch": {
+      "version": "9.0.5",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
+      "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
+      "dependencies": {
+        "brace-expansion": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=16 || 14 >=14.17"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/minizlib/node_modules/rimraf": {
+      "version": "5.0.10",
+      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.10.tgz",
+      "integrity": "sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==",
+      "dependencies": {
+        "glob": "^10.3.7"
+      },
+      "bin": {
+        "rimraf": "dist/esm/bin.mjs"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
       }
     },
-    "node_modules/mkdirp-classic": {
-      "version": "0.5.3",
-      "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
-      "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A=="
+    "node_modules/mkdirp": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
+      "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
+      "bin": {
+        "mkdirp": "dist/cjs/src/bin.js"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
     },
     "node_modules/ms": {
       "version": "2.1.2",
@@ -2979,11 +3545,6 @@
         "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
       }
     },
-    "node_modules/napi-build-utils": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-1.0.2.tgz",
-      "integrity": "sha512-ONmRUqK7zj7DWX0D9ADe03wbwOBZxNAfF20PlGfCWQcD3+/MakShIHrMqx9YwPTfxDdF1zLeL+RGZiR9kGMLdg=="
-    },
     "node_modules/natural-compare": {
       "version": "1.4.0",
       "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz",
@@ -3038,22 +3599,6 @@
         }
       }
     },
-    "node_modules/node-abi": {
-      "version": "3.45.0",
-      "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.45.0.tgz",
-      "integrity": "sha512-iwXuFrMAcFVi/ZoZiqq8BzAdsLw9kxDfTC0HMyjXfSL/6CSDAGD5UmR7azrAgWV1zKYq7dUUMj4owusBWKLsiQ==",
-      "dependencies": {
-        "semver": "^7.3.5"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/node-addon-api": {
-      "version": "6.1.0",
-      "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-6.1.0.tgz",
-      "integrity": "sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA=="
-    },
     "node_modules/node-releases": {
       "version": "2.0.13",
       "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.13.tgz",
@@ -3228,46 +3773,6 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/onnx-proto": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/onnx-proto/-/onnx-proto-4.0.4.tgz",
-      "integrity": "sha512-aldMOB3HRoo6q/phyB6QRQxSt895HNNw82BNyZ2CMh4bjeKv7g/c+VpAFtJuEMVfYLMbRx61hbuqnKceLeDcDA==",
-      "dependencies": {
-        "protobufjs": "^6.8.8"
-      }
-    },
-    "node_modules/onnxruntime-common": {
-      "version": "1.14.0",
-      "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.14.0.tgz",
-      "integrity": "sha512-3LJpegM2iMNRX2wUmtYfeX/ytfOzNwAWKSq1HbRrKc9+uqG/FsEA0bbKZl1btQeZaXhC26l44NWpNUeXPII7Ew=="
-    },
-    "node_modules/onnxruntime-node": {
-      "version": "1.14.0",
-      "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.14.0.tgz",
-      "integrity": "sha512-5ba7TWomIV/9b6NH/1x/8QEeowsb+jBEvFzU6z0T4mNsFwdPqXeFUM7uxC6QeSRkEbWu3qEB0VMjrvzN/0S9+w==",
-      "optional": true,
-      "os": [
-        "win32",
-        "darwin",
-        "linux"
-      ],
-      "dependencies": {
-        "onnxruntime-common": "~1.14.0"
-      }
-    },
-    "node_modules/onnxruntime-web": {
-      "version": "1.14.0",
-      "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.14.0.tgz",
-      "integrity": "sha512-Kcqf43UMfW8mCydVGcX9OMXI2VN17c0p6XvR7IPSZzBf/6lteBzXHvcEVWDPmCKuGombl997HgLqj91F11DzXw==",
-      "dependencies": {
-        "flatbuffers": "^1.12.0",
-        "guid-typescript": "^1.0.9",
-        "long": "^4.0.0",
-        "onnx-proto": "^4.0.4",
-        "onnxruntime-common": "~1.14.0",
-        "platform": "^1.3.6"
-      }
-    },
     "node_modules/open": {
       "version": "9.1.0",
       "resolved": "https://registry.npmjs.org/open/-/open-9.1.0.tgz",
@@ -3329,6 +3834,11 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/package-json-from-dist": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.0.tgz",
+      "integrity": "sha512-dATvCeZN/8wQsGywez1mzHtTlP22H8OEfPrVMLNr4/eGa+ijtLn/6M5f0dY8UKNrC2O9UCU6SSoG3qRKnt7STw=="
+    },
     "node_modules/parent-module": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz",
@@ -3369,6 +3879,21 @@
       "resolved": "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz",
       "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw=="
     },
+    "node_modules/path-scurry": {
+      "version": "1.11.1",
+      "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz",
+      "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==",
+      "dependencies": {
+        "lru-cache": "^10.2.0",
+        "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0"
+      },
+      "engines": {
+        "node": ">=16 || 14 >=14.18"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
     "node_modules/path-type": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz",
@@ -3538,57 +4063,6 @@
       "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz",
       "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ=="
     },
-    "node_modules/prebuild-install": {
-      "version": "7.1.1",
-      "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.1.tgz",
-      "integrity": "sha512-jAXscXWMcCK8GgCoHOfIr0ODh5ai8mj63L2nWrjuAgXE6tDyYGnx4/8o/rCgU+B4JSyZBKbeZqzhtwtC3ovxjw==",
-      "dependencies": {
-        "detect-libc": "^2.0.0",
-        "expand-template": "^2.0.3",
-        "github-from-package": "0.0.0",
-        "minimist": "^1.2.3",
-        "mkdirp-classic": "^0.5.3",
-        "napi-build-utils": "^1.0.1",
-        "node-abi": "^3.3.0",
-        "pump": "^3.0.0",
-        "rc": "^1.2.7",
-        "simple-get": "^4.0.0",
-        "tar-fs": "^2.0.0",
-        "tunnel-agent": "^0.6.0"
-      },
-      "bin": {
-        "prebuild-install": "bin.js"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/prebuild-install/node_modules/tar-fs": {
-      "version": "2.1.1",
-      "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.1.tgz",
-      "integrity": "sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng==",
-      "dependencies": {
-        "chownr": "^1.1.1",
-        "mkdirp-classic": "^0.5.2",
-        "pump": "^3.0.0",
-        "tar-stream": "^2.1.4"
-      }
-    },
-    "node_modules/prebuild-install/node_modules/tar-stream": {
-      "version": "2.2.0",
-      "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz",
-      "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
-      "dependencies": {
-        "bl": "^4.0.3",
-        "end-of-stream": "^1.4.1",
-        "fs-constants": "^1.0.0",
-        "inherits": "^2.0.3",
-        "readable-stream": "^3.1.1"
-      },
-      "engines": {
-        "node": ">=6"
-      }
-    },
     "node_modules/prelude-ls": {
       "version": "1.2.1",
       "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz",
@@ -3635,15 +4109,6 @@
       "resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz",
       "integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q=="
     },
-    "node_modules/pump": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
-      "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
-      "dependencies": {
-        "end-of-stream": "^1.1.0",
-        "once": "^1.3.1"
-      }
-    },
     "node_modules/punycode": {
       "version": "2.3.0",
       "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
@@ -3671,33 +4136,6 @@
         }
       ]
     },
-    "node_modules/queue-tick": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/queue-tick/-/queue-tick-1.0.1.tgz",
-      "integrity": "sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag=="
-    },
-    "node_modules/rc": {
-      "version": "1.2.8",
-      "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz",
-      "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==",
-      "dependencies": {
-        "deep-extend": "^0.6.0",
-        "ini": "~1.3.0",
-        "minimist": "^1.2.0",
-        "strip-json-comments": "~2.0.1"
-      },
-      "bin": {
-        "rc": "cli.js"
-      }
-    },
-    "node_modules/rc/node_modules/strip-json-comments": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
-      "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
     "node_modules/react": {
       "version": "18.2.0",
       "resolved": "https://registry.npmjs.org/react/-/react-18.2.0.tgz",
@@ -3734,19 +4172,6 @@
         "pify": "^2.3.0"
       }
     },
-    "node_modules/readable-stream": {
-      "version": "3.6.2",
-      "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
-      "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
-      "dependencies": {
-        "inherits": "^2.0.3",
-        "string_decoder": "^1.1.1",
-        "util-deprecate": "^1.0.1"
-      },
-      "engines": {
-        "node": ">= 6"
-      }
-    },
     "node_modules/readdirp": {
       "version": "3.6.0",
       "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz",
@@ -3969,25 +4394,6 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/safe-buffer": {
-      "version": "5.2.1",
-      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
-      "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
     "node_modules/safe-regex-test": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/safe-regex-test/-/safe-regex-test-1.0.0.tgz",
@@ -4010,12 +4416,9 @@
       }
     },
     "node_modules/semver": {
-      "version": "7.5.4",
-      "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz",
-      "integrity": "sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==",
-      "dependencies": {
-        "lru-cache": "^6.0.0"
-      },
+      "version": "7.6.3",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz",
+      "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==",
       "bin": {
         "semver": "bin/semver.js"
       },
@@ -4023,28 +4426,6 @@
         "node": ">=10"
       }
     },
-    "node_modules/sharp": {
-      "version": "0.32.6",
-      "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.32.6.tgz",
-      "integrity": "sha512-KyLTWwgcR9Oe4d9HwCwNM2l7+J0dUQwn/yf7S0EnTtb0eVS4RxO0eUSvxPtzT4F3SY+C4K6fqdv/DO27sJ/v/w==",
-      "hasInstallScript": true,
-      "dependencies": {
-        "color": "^4.2.3",
-        "detect-libc": "^2.0.2",
-        "node-addon-api": "^6.1.0",
-        "prebuild-install": "^7.1.1",
-        "semver": "^7.5.4",
-        "simple-get": "^4.0.1",
-        "tar-fs": "^3.0.4",
-        "tunnel-agent": "^0.6.0"
-      },
-      "engines": {
-        "node": ">=14.15.0"
-      },
-      "funding": {
-        "url": "https://opencollective.com/libvips"
-      }
-    },
     "node_modules/shebang-command": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
@@ -4082,49 +4463,6 @@
       "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz",
       "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ=="
     },
-    "node_modules/simple-concat": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz",
-      "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
-    "node_modules/simple-get": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz",
-      "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "dependencies": {
-        "decompress-response": "^6.0.0",
-        "once": "^1.3.1",
-        "simple-concat": "^1.0.0"
-      }
-    },
     "node_modules/simple-swizzle": {
       "version": "0.2.2",
       "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz",
@@ -4157,21 +4495,64 @@
         "node": ">=10.0.0"
       }
     },
-    "node_modules/streamx": {
-      "version": "2.15.0",
-      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.15.0.tgz",
-      "integrity": "sha512-HcxY6ncGjjklGs1xsP1aR71INYcsXFJet5CU1CHqihQ2J5nOsbd4OjgjHO42w/4QNv9gZb3BueV+Vxok5pLEXg==",
+    "node_modules/string-width": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz",
+      "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==",
       "dependencies": {
-        "fast-fifo": "^1.1.0",
-        "queue-tick": "^1.0.1"
+        "eastasianwidth": "^0.2.0",
+        "emoji-regex": "^9.2.2",
+        "strip-ansi": "^7.0.1"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/string_decoder": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
-      "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
+    "node_modules/string-width-cjs": {
+      "name": "string-width",
+      "version": "4.2.3",
+      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
+      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
+      "dependencies": {
+        "emoji-regex": "^8.0.0",
+        "is-fullwidth-code-point": "^3.0.0",
+        "strip-ansi": "^6.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/string-width-cjs/node_modules/emoji-regex": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
+      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
+    },
+    "node_modules/string-width/node_modules/ansi-regex": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz",
+      "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-regex?sponsor=1"
+      }
+    },
+    "node_modules/string-width/node_modules/strip-ansi": {
+      "version": "7.1.0",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz",
+      "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==",
       "dependencies": {
-        "safe-buffer": "~5.2.0"
+        "ansi-regex": "^6.0.1"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/strip-ansi?sponsor=1"
       }
     },
     "node_modules/string.prototype.matchall": {
@@ -4245,6 +4626,18 @@
         "node": ">=8"
       }
     },
+    "node_modules/strip-ansi-cjs": {
+      "name": "strip-ansi",
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
+      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
+      "dependencies": {
+        "ansi-regex": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
     "node_modules/strip-bom": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz",
@@ -4418,24 +4811,36 @@
         "node": ">=6"
       }
     },
-    "node_modules/tar-fs": {
-      "version": "3.0.4",
-      "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.0.4.tgz",
-      "integrity": "sha512-5AFQU8b9qLfZCX9zp2duONhPmZv0hGYiBPJsyUdqMjzq/mqVpy/rEUSeHk1+YitmxugaptgBh5oDGU3VsAJq4w==",
+    "node_modules/tar": {
+      "version": "7.4.3",
+      "resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz",
+      "integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==",
       "dependencies": {
-        "mkdirp-classic": "^0.5.2",
-        "pump": "^3.0.0",
-        "tar-stream": "^3.1.5"
+        "@isaacs/fs-minipass": "^4.0.0",
+        "chownr": "^3.0.0",
+        "minipass": "^7.1.2",
+        "minizlib": "^3.0.1",
+        "mkdirp": "^3.0.1",
+        "yallist": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=18"
       }
     },
-    "node_modules/tar-stream": {
-      "version": "3.1.6",
-      "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.6.tgz",
-      "integrity": "sha512-B/UyjYwPpMBv+PaFSWAmtYjwdrlEaZQEhMIBFNC5oEG8lpiW8XjcSdmEaClj28ArfKScKHs2nshz3k2le6crsg==",
-      "dependencies": {
-        "b4a": "^1.6.4",
-        "fast-fifo": "^1.2.0",
-        "streamx": "^2.15.0"
+    "node_modules/tar/node_modules/chownr": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz",
+      "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==",
+      "engines": {
+        "node": ">=18"
+      }
+    },
+    "node_modules/tar/node_modules/yallist": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
+      "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==",
+      "engines": {
+        "node": ">=18"
       }
     },
     "node_modules/text-table": {
@@ -4524,17 +4929,6 @@
       "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz",
       "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg=="
     },
-    "node_modules/tunnel-agent": {
-      "version": "0.6.0",
-      "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
-      "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==",
-      "dependencies": {
-        "safe-buffer": "^5.0.1"
-      },
-      "engines": {
-        "node": "*"
-      }
-    },
     "node_modules/type-check": {
       "version": "0.4.0",
       "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz",
@@ -4742,16 +5136,98 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/wrap-ansi": {
+      "version": "8.1.0",
+      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
+      "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==",
+      "dependencies": {
+        "ansi-styles": "^6.1.0",
+        "string-width": "^5.0.1",
+        "strip-ansi": "^7.0.1"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
+      }
+    },
+    "node_modules/wrap-ansi-cjs": {
+      "name": "wrap-ansi",
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
+      "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
+      "dependencies": {
+        "ansi-styles": "^4.0.0",
+        "string-width": "^4.1.0",
+        "strip-ansi": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
+      }
+    },
+    "node_modules/wrap-ansi-cjs/node_modules/emoji-regex": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
+      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
+    },
+    "node_modules/wrap-ansi-cjs/node_modules/string-width": {
+      "version": "4.2.3",
+      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
+      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
+      "dependencies": {
+        "emoji-regex": "^8.0.0",
+        "is-fullwidth-code-point": "^3.0.0",
+        "strip-ansi": "^6.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/wrap-ansi/node_modules/ansi-regex": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz",
+      "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-regex?sponsor=1"
+      }
+    },
+    "node_modules/wrap-ansi/node_modules/ansi-styles": {
+      "version": "6.2.1",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz",
+      "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/wrap-ansi/node_modules/strip-ansi": {
+      "version": "7.1.0",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz",
+      "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==",
+      "dependencies": {
+        "ansi-regex": "^6.0.1"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/strip-ansi?sponsor=1"
+      }
+    },
     "node_modules/wrappy": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
       "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="
     },
-    "node_modules/yallist": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
-      "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A=="
-    },
     "node_modules/yaml": {
       "version": "2.3.1",
       "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.1.tgz",
diff --git a/examples/next-client/package.json b/examples/next-client/package.json
index 814c663c9..7bccaea67 100644
--- a/examples/next-client/package.json
+++ b/examples/next-client/package.json
@@ -9,7 +9,7 @@
     "lint": "next lint"
   },
   "dependencies": {
-    "@xenova/transformers": "^2.4.2",
+    "@huggingface/transformers": "^3.0.0-alpha.5",
     "autoprefixer": "10.4.14",
     "eslint": "8.45.0",
     "eslint-config-next": "13.4.12",
diff --git a/examples/next-client/src/app/worker.js b/examples/next-client/src/app/worker.js
index c7704df8a..4b9960009 100644
--- a/examples/next-client/src/app/worker.js
+++ b/examples/next-client/src/app/worker.js
@@ -1,7 +1,4 @@
-import { pipeline, env } from "@xenova/transformers";
-
-// Skip local model check
-env.allowLocalModels = false;
+import { pipeline } from "@huggingface/transformers";
 
 // Use the Singleton pattern to enable lazy construction of the pipeline.
 class PipelineSingleton {
@@ -10,9 +7,7 @@ class PipelineSingleton {
     static instance = null;
 
     static async getInstance(progress_callback = null) {
-        if (this.instance === null) {
-            this.instance = pipeline(this.task, this.model, { progress_callback });
-        }
+        this.instance ??= pipeline(this.task, this.model, { progress_callback });
         return this.instance;
     }
 }
@@ -21,14 +16,14 @@ class PipelineSingleton {
 self.addEventListener('message', async (event) => {
     // Retrieve the classification pipeline. When called for the first time,
     // this will load the pipeline and save it for future use.
-    let classifier = await PipelineSingleton.getInstance(x => {
+    const classifier = await PipelineSingleton.getInstance(x => {
         // We also add a progress callback to the pipeline so that we can
         // track model loading.
         self.postMessage(x);
     });
 
     // Actually perform the classification
-    let output = await classifier(event.data.text);
+    const output = await classifier(event.data.text);
 
     // Send the output back to the main thread
     self.postMessage({
diff --git a/examples/remove-background-client/index.html b/examples/remove-background-client/index.html
index d20f9eaba..a85cef65f 100644
--- a/examples/remove-background-client/index.html
+++ b/examples/remove-background-client/index.html
@@ -8,7 +8,7 @@
 </head>
 
 <body>
-  <h1>Background Removal w/ <a href="http://github.com/xenova/transformers.js" target="_blank">🤗 Transformers.js</a>
+  <h1>Background Removal w/ <a href="https://github.com/huggingface/transformers.js" target="_blank">🤗 Transformers.js</a>
   </h1>
   <h4>Runs locally in your browser, powered by the <a href="https://huggingface.co/briaai/RMBG-1.4" target="_blank">RMBG V1.4 model</a> from <a
       href="https://bria.ai/" target="_blank">BRIA AI</a>
diff --git a/examples/segment-anything-client/.gitignore b/examples/segment-anything-client/.gitignore
new file mode 100644
index 000000000..1521c8b76
--- /dev/null
+++ b/examples/segment-anything-client/.gitignore
@@ -0,0 +1 @@
+dist
diff --git a/examples/segment-anything-client/index.css b/examples/segment-anything-client/index.css
index a896b8846..fc556bcac 100644
--- a/examples/segment-anything-client/index.css
+++ b/examples/segment-anything-client/index.css
@@ -23,7 +23,7 @@ body,
     align-items: center;
 }
 
-h1 {
+h1, h3 {
     text-align: center;
 }
 
diff --git a/examples/segment-anything-client/index.html b/examples/segment-anything-client/index.html
index 5e8a2e9b9..9dba925fe 100644
--- a/examples/segment-anything-client/index.html
+++ b/examples/segment-anything-client/index.html
@@ -6,11 +6,13 @@
     <link rel="stylesheet" href="index.css" />
 
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Transformers.js - Segment Anything</title>
+    <title>Transformers.js - Segment Anything WebGPU</title>
 </head>
 
 <body>
-    <h1>Segment Anything w/ 🤗 Transformers.js</h1>
+    <h1>Segment Anything WebGPU</h1>
+    <h3>In-browser image segmentation w/ <a href="https://hf.co/docs/transformers.js" target="_blank">🤗
+            Transformers.js</a></h3>
     <div id="container">
         <label id="upload-button" for="upload">
             <svg width="25" height="25" viewBox="0 0 25 25" fill="none" xmlns="http://www.w3.org/2000/svg">
diff --git a/examples/segment-anything-client/index.js b/examples/segment-anything-client/index.js
index e01b59c49..979db0582 100644
--- a/examples/segment-anything-client/index.js
+++ b/examples/segment-anything-client/index.js
@@ -23,9 +23,10 @@ const BASE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/re
 const EXAMPLE_URL = BASE_URL + 'corgi.jpg';
 
 // Create a web worker so that the main (UI) thread is not blocked during inference.
-const worker = new Worker('worker.js', {
-    type: 'module',
-});
+const worker = new Worker(
+    new URL('./worker.js', import.meta.url),
+    { type: 'module' }
+);
 
 // Preload star and cross images to avoid lag on first click
 const star = new Image();
diff --git a/examples/segment-anything-client/package.json b/examples/segment-anything-client/package.json
new file mode 100644
index 000000000..aa790ea74
--- /dev/null
+++ b/examples/segment-anything-client/package.json
@@ -0,0 +1,17 @@
+{
+  "name": "segment-anything-client",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "@huggingface/transformers": "^3.0.0-alpha.0"
+  },
+  "devDependencies": {
+    "vite": "^5.2.9"
+  }
+}
diff --git a/examples/segment-anything-client/vite.config.js b/examples/segment-anything-client/vite.config.js
new file mode 100644
index 000000000..b2d374d1b
--- /dev/null
+++ b/examples/segment-anything-client/vite.config.js
@@ -0,0 +1,18 @@
+import { defineConfig } from 'vite';
+export default defineConfig(env => {
+  const config = {
+    build: {
+      target: 'esnext'
+    }
+  };
+
+  // TODO: Add this back when .wasm files are served locally
+  // if (env.mode === 'development') {
+  //   // The .wasm files are not correctly served using Vite in development mode.
+  //   // This is a workaround to exclude the onnxruntime-web package from Vite's optimization.
+  //   // See also: https://github.com/vitejs/vite/issues/8427
+  //   config.optimizeDeps = { exclude: ["onnxruntime-web"] };
+  // }
+
+  return config;
+});
diff --git a/examples/segment-anything-client/worker.js b/examples/segment-anything-client/worker.js
index 5dd636973..bb783e0b5 100644
--- a/examples/segment-anything-client/worker.js
+++ b/examples/segment-anything-client/worker.js
@@ -1,33 +1,25 @@
-import { env, SamModel, AutoProcessor, RawImage, Tensor } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.14.0';
-
-// Since we will download the model from the Hugging Face Hub, we can skip the local model check
-env.allowLocalModels = false;
+import { SamModel, AutoProcessor, RawImage, Tensor } from '@huggingface/transformers';
 
 // We adopt the singleton pattern to enable lazy-loading of the model and processor.
 export class SegmentAnythingSingleton {
     static model_id = 'Xenova/slimsam-77-uniform';
     static model;
     static processor;
-    static quantized = true;
 
     static getInstance() {
-        if (!this.model) {
-            this.model = SamModel.from_pretrained(this.model_id, {
-                quantized: this.quantized,
-            });
-        }
-        if (!this.processor) {
-            this.processor = AutoProcessor.from_pretrained(this.model_id);
-        }
+        this.model ??= SamModel.from_pretrained(this.model_id, {
+            dtype: 'fp16',
+            device: 'webgpu',
+        });
+        this.processor ??= AutoProcessor.from_pretrained(this.model_id);
 
         return Promise.all([this.model, this.processor]);
     }
 }
 
-
 // State variables
-let image_embeddings = null;
-let image_inputs = null;
+let imageEmbeddings = null;
+let imageInputs = null;
 let ready = false;
 
 self.onmessage = async (e) => {
@@ -42,8 +34,8 @@ self.onmessage = async (e) => {
 
     const { type, data } = e.data;
     if (type === 'reset') {
-        image_inputs = null;
-        image_embeddings = null;
+        imageInputs = null;
+        imageEmbeddings = null;
 
     } else if (type === 'segment') {
         // Indicate that we are starting to segment the image
@@ -54,8 +46,8 @@ self.onmessage = async (e) => {
 
         // Read the image and recompute image embeddings
         const image = await RawImage.read(e.data.data);
-        image_inputs = await processor(image);
-        image_embeddings = await model.get_image_embeddings(image_inputs)
+        imageInputs = await processor(image);
+        imageEmbeddings = await model.get_image_embeddings(imageInputs)
 
         // Indicate that we have computed the image embeddings, and we are ready to accept decoding requests
         self.postMessage({
@@ -65,7 +57,7 @@ self.onmessage = async (e) => {
 
     } else if (type === 'decode') {
         // Prepare inputs for decoding
-        const reshaped = image_inputs.reshaped_input_sizes[0];
+        const reshaped = imageInputs.reshaped_input_sizes[0];
         const points = data.map(x => [x.point[0] * reshaped[1], x.point[1] * reshaped[0]])
         const labels = data.map(x => BigInt(x.label));
 
@@ -81,17 +73,17 @@ self.onmessage = async (e) => {
         )
 
         // Generate the mask
-        const outputs = await model({
-            ...image_embeddings,
+        const { pred_masks, iou_scores } = await model({
+            ...imageEmbeddings,
             input_points,
             input_labels,
         })
 
         // Post-process the mask
         const masks = await processor.post_process_masks(
-            outputs.pred_masks,
-            image_inputs.original_sizes,
-            image_inputs.reshaped_input_sizes,
+            pred_masks,
+            imageInputs.original_sizes,
+            imageInputs.reshaped_input_sizes,
         );
 
         // Send the result back to the main thread
@@ -99,7 +91,7 @@ self.onmessage = async (e) => {
             type: 'decode_result',
             data: {
                 mask: RawImage.fromTensor(masks[0][0]),
-                scores: outputs.iou_scores.data,
+                scores: iou_scores.data,
             },
         });
 
diff --git a/examples/text-to-speech-client/src/worker.js b/examples/text-to-speech-client/src/worker.js
index 76b8f76ef..4644890d3 100644
--- a/examples/text-to-speech-client/src/worker.js
+++ b/examples/text-to-speech-client/src/worker.js
@@ -25,14 +25,14 @@ class MyTextToSpeechPipeline {
 
         if (this.model_instance === null) {
             this.model_instance = SpeechT5ForTextToSpeech.from_pretrained(this.model_id, {
-                quantized: false,
+                dtype: 'fp32',
                 progress_callback,
             });
         }
 
         if (this.vocoder_instance === null) {
             this.vocoder_instance = SpeechT5HifiGan.from_pretrained(this.vocoder_id, {
-                quantized: false,
+                dtype: 'fp32',
                 progress_callback,
             });
         }
diff --git a/examples/tokenizer-playground/src/App.jsx b/examples/tokenizer-playground/src/App.jsx
index 1e1a286c3..1307e41c8 100644
--- a/examples/tokenizer-playground/src/App.jsx
+++ b/examples/tokenizer-playground/src/App.jsx
@@ -98,7 +98,7 @@ function App() {
 
       <div>
         <h1 className='text-5xl font-bold mb-2'>The Tokenizer Playground</h1>
-        <h2 className='text-lg font-normal'>Experiment with different tokenizers (running <a className="text-gray-900 underline" href="https://github.com/xenova/transformers.js">locally</a> in your browser).</h2>
+        <h2 className='text-lg font-normal'>Experiment with different tokenizers (running <a className="text-gray-900 underline" href="https://github.com/huggingface/transformers.js">locally</a> in your browser).</h2>
       </div>
 
       <div>
diff --git a/examples/video-object-detection/index.html b/examples/video-object-detection/index.html
index 680b1e3bf..bd731f32c 100644
--- a/examples/video-object-detection/index.html
+++ b/examples/video-object-detection/index.html
@@ -10,7 +10,7 @@
 <body>
   <h1>
     Real-time object detection w/
-    <a href="http://github.com/xenova/transformers.js" target="_blank">🤗 Transformers.js</a>
+    <a href="https://github.com/huggingface/transformers.js" target="_blank">🤗 Transformers.js</a>
   </h1>
   <h4>
     Runs locally in your browser, powered by
diff --git a/examples/video-object-detection/main.js b/examples/video-object-detection/main.js
index 5eea3aa91..12c3552a4 100644
--- a/examples/video-object-detection/main.js
+++ b/examples/video-object-detection/main.js
@@ -18,6 +18,13 @@ const thresholdSlider = document.getElementById('threshold');
 const thresholdLabel = document.getElementById('threshold-value');
 const sizeSlider = document.getElementById('size');
 const sizeLabel = document.getElementById('size-value');
+const scaleSlider = document.getElementById('scale');
+const scaleLabel = document.getElementById('scale-value');
+
+function setStreamSize(width, height) {
+    video.width = canvas.width = Math.round(width);
+    video.height = canvas.height = Math.round(height);
+}
 
 status.textContent = 'Loading model...';
 
@@ -27,6 +34,14 @@ const model = await AutoModel.from_pretrained(model_id);
 const processor = await AutoProcessor.from_pretrained(model_id);
 
 // Set up controls
+let scale = 0.5;
+scaleSlider.addEventListener('input', () => {
+    scale = Number(scaleSlider.value);
+    setStreamSize(video.videoWidth * scale, video.videoHeight * scale);
+    scaleLabel.textContent = scale;
+});
+scaleSlider.disabled = false;
+
 let threshold = 0.25;
 thresholdSlider.addEventListener('input', () => {
     threshold = Number(thresholdSlider.value);
@@ -130,8 +145,7 @@ navigator.mediaDevices.getUserMedia(
     const videoTrack = stream.getVideoTracks()[0];
     const { width, height } = videoTrack.getSettings();
 
-    canvas.width = width;
-    canvas.height = height;
+    setStreamSize(width * scale, height * scale);
 
     // Set container width and height depending on the image aspect ratio
     const ar = width / height;
diff --git a/examples/webgpu-chat/.eslintrc.cjs b/examples/webgpu-chat/.eslintrc.cjs
new file mode 100644
index 000000000..ce8fffe57
--- /dev/null
+++ b/examples/webgpu-chat/.eslintrc.cjs
@@ -0,0 +1,21 @@
+module.exports = {
+  root: true,
+  env: { browser: true, es2020: true },
+  extends: [
+    'eslint:recommended',
+    'plugin:react/recommended',
+    'plugin:react/jsx-runtime',
+    'plugin:react-hooks/recommended',
+  ],
+  ignorePatterns: ['dist', '.eslintrc.cjs'],
+  parserOptions: { ecmaVersion: 'latest', sourceType: 'module' },
+  settings: { react: { version: '18.2' } },
+  plugins: ['react-refresh'],
+  rules: {
+    'react-refresh/only-export-components': [
+      'warn',
+      { allowConstantExport: true },
+    ],
+    'react/prop-types': 'off'
+  },
+}
diff --git a/examples/webgpu-chat/.gitignore b/examples/webgpu-chat/.gitignore
new file mode 100644
index 000000000..a547bf36d
--- /dev/null
+++ b/examples/webgpu-chat/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/examples/webgpu-chat/README.md b/examples/webgpu-chat/README.md
new file mode 100644
index 000000000..f768e33fc
--- /dev/null
+++ b/examples/webgpu-chat/README.md
@@ -0,0 +1,8 @@
+# React + Vite
+
+This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
+
+Currently, two official plugins are available:
+
+- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh
+- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
diff --git a/examples/webgpu-chat/index.html b/examples/webgpu-chat/index.html
new file mode 100644
index 000000000..404c33b9a
--- /dev/null
+++ b/examples/webgpu-chat/index.html
@@ -0,0 +1,13 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="/vite.svg" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Phi-3 WebGPU</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>
diff --git a/examples/webgpu-chat/package.json b/examples/webgpu-chat/package.json
new file mode 100644
index 000000000..34e6e95e6
--- /dev/null
+++ b/examples/webgpu-chat/package.json
@@ -0,0 +1,32 @@
+{
+  "name": "webgpu-chat",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "@xenova/transformers": "github:xenova/transformers.js#v3",
+    "dompurify": "^3.1.2",
+    "marked": "^12.0.2",
+    "react": "^18.2.0",
+    "react-dom": "^18.2.0"
+  },
+  "devDependencies": {
+    "@types/react": "^18.2.43",
+    "@types/react-dom": "^18.2.17",
+    "@vitejs/plugin-react": "^4.2.1",
+    "autoprefixer": "^10.4.19",
+    "eslint": "^8.55.0",
+    "eslint-plugin-react": "^7.33.2",
+    "eslint-plugin-react-hooks": "^4.6.0",
+    "eslint-plugin-react-refresh": "^0.4.5",
+    "postcss": "^8.4.38",
+    "tailwindcss": "^3.4.3",
+    "vite": "^5.2.11"
+  }
+}
diff --git a/examples/webgpu-chat/postcss.config.js b/examples/webgpu-chat/postcss.config.js
new file mode 100644
index 000000000..2e7af2b7f
--- /dev/null
+++ b/examples/webgpu-chat/postcss.config.js
@@ -0,0 +1,6 @@
+export default {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+}
diff --git a/examples/webgpu-chat/public/logo.png b/examples/webgpu-chat/public/logo.png
new file mode 100644
index 000000000..73ecb940a
Binary files /dev/null and b/examples/webgpu-chat/public/logo.png differ
diff --git a/examples/webgpu-chat/src/App.jsx b/examples/webgpu-chat/src/App.jsx
new file mode 100644
index 000000000..fd5e3124e
--- /dev/null
+++ b/examples/webgpu-chat/src/App.jsx
@@ -0,0 +1,282 @@
+import { useEffect, useState, useRef } from 'react';
+
+import Chat from './components/Chat';
+import ArrowRightIcon from './components/icons/ArrowRightIcon';
+import StopIcon from './components/icons/StopIcon';
+import Progress from './components/Progress';
+
+const IS_WEBGPU_AVAILABLE = !!navigator.gpu;
+const STICKY_SCROLL_THRESHOLD = 120;
+
+function App() {
+
+  // Create a reference to the worker object.
+  const worker = useRef(null);
+
+  const textareaRef = useRef(null);
+  const chatContainerRef = useRef(null);
+
+  // Model loading and progress
+  const [status, setStatus] = useState(null);
+  const [loadingMessage, setLoadingMessage] = useState('');
+  const [progressItems, setProgressItems] = useState([]);
+  const [isRunning, setIsRunning] = useState(false);
+
+  // Inputs and outputs
+  const [input, setInput] = useState('');
+  const [messages, setMessages] = useState([]);
+  const [tps, setTps] = useState(null);
+  const [numTokens, setNumTokens] = useState(null);
+
+  function onEnter(message) {
+    setMessages(prev => [
+      ...prev,
+      { "role": "user", "content": message },
+    ]);
+    setTps(null);
+    setIsRunning(true);
+    setInput('');
+  }
+
+  useEffect(() => {
+    resizeInput();
+  }, [input]);
+
+  function onInterrupt() {
+    // NOTE: We do not set isRunning to false here because the worker
+    // will send a 'complete' message when it is done.
+    worker.current.postMessage({ type: 'interrupt' });
+  }
+
+  function resizeInput() {
+    if (!textareaRef.current) return;
+
+    const target = textareaRef.current;
+    target.style.height = 'auto';
+    const newHeight = Math.min(Math.max(target.scrollHeight, 24), 200);
+    target.style.height = `${newHeight}px`;
+  }
+
+  // We use the `useEffect` hook to setup the worker as soon as the `App` component is mounted.
+  useEffect(() => {
+    if (!worker.current) {
+      // Create the worker if it does not yet exist.
+      worker.current = new Worker(new URL('./worker.js', import.meta.url), {
+        type: 'module'
+      });
+    }
+
+    // Create a callback function for messages from the worker thread.
+    const onMessageReceived = (e) => {
+      switch (e.data.status) {
+        case 'loading':
+          // Model file start load: add a new progress item to the list.
+          setStatus('loading');
+          setLoadingMessage(e.data.data);
+          break;
+
+        case 'initiate':
+          setProgressItems(prev => [...prev, e.data]);
+          break;
+
+        case 'progress':
+          // Model file progress: update one of the progress items.
+          setProgressItems(
+            prev => prev.map(item => {
+              if (item.file === e.data.file) {
+                return { ...item, ...e.data }
+              }
+              return item;
+            })
+          );
+          break;
+
+        case 'done':
+          // Model file loaded: remove the progress item from the list.
+          setProgressItems(
+            prev => prev.filter(item => item.file !== e.data.file)
+          );
+          break;
+
+        case 'ready':
+          // Pipeline ready: the worker is ready to accept messages.
+          setStatus('ready');
+          break;
+
+        case 'start': {
+          // Start generation
+          setMessages(prev => [...prev, { "role": "assistant", "content": "" }]);
+        }
+          break;
+
+        case 'update': {
+          // Generation update: update the output text.
+          // Parse messages
+          const { output, tps, numTokens } = e.data;
+          setTps(tps);
+          setNumTokens(numTokens)
+          setMessages(prev => {
+            const cloned = [...prev];
+            const last = cloned.at(-1);
+            cloned[cloned.length - 1] = { ...last, content: last.content + output };
+            return cloned;
+          });
+        }
+          break;
+
+        case 'complete':
+          // Generation complete: re-enable the "Generate" button
+          setIsRunning(false);
+          break;
+      }
+    };
+
+    // Attach the callback function as an event listener.
+    worker.current.addEventListener('message', onMessageReceived);
+
+    // Define a cleanup function for when the component is unmounted.
+    return () => {
+      worker.current.removeEventListener('message', onMessageReceived);
+    };
+  }, []);
+
+  // Send the messages to the worker thread whenever the `messages` state changes.
+  useEffect(() => {
+    if (messages.filter(x => x.role === 'user').length === 0) {
+      // No user messages yet: do nothing.
+      return;
+    }
+    if (messages.at(-1).role === 'assistant') {
+      // Do not update if the last message is from the assistant
+      return;
+    }
+    setTps(null);
+    worker.current.postMessage({ type: 'generate', data: messages });
+  }, [messages, isRunning]);
+
+  useEffect(() => {
+    if (!chatContainerRef.current) return;
+    if (isRunning) {
+      const element = chatContainerRef.current;
+      if (element.scrollHeight - element.scrollTop - element.clientHeight < STICKY_SCROLL_THRESHOLD) {
+        element.scrollTop = element.scrollHeight;
+      }
+    }
+  }, [messages, isRunning]);
+
+  return (
+    IS_WEBGPU_AVAILABLE
+      ? (<div className="flex flex-col h-screen mx-auto items justify-end text-gray-800 dark:text-gray-200 bg-white dark:bg-gray-900">
+
+        {status === null && messages.length === 0 && (
+          <div className="h-full overflow-auto scrollbar-thin flex justify-center items-center flex-col relative">
+            <div className="flex flex-col items-center mb-1 max-w-[250px] text-center">
+              <img src="logo.png" width="100%" height="auto" className="block"></img>
+              <h1 className="text-4xl font-bold mb-1">Phi-3 WebGPU</h1>
+              <h2 className="font-semibold">A private and powerful AI chatbot that runs locally in your browser.</h2>
+            </div>
+
+            <div className="flex flex-col items-center px-4">
+              <p className="max-w-[514px] mb-4">
+                <br />
+                You are about to load <a href="https://huggingface.co/Xenova/Phi-3-mini-4k-instruct" target="_blank" rel="noreferrer" className="font-medium underline">Phi-3-mini-4k-instruct</a>,
+                a 3.82 billion parameter LLM that is optimized for inference on the web. Once downloaded, the model (2.3&nbsp;GB) will be cached and reused when you revisit the page.<br />
+                <br />
+                Everything runs directly in your browser using <a href="https://huggingface.co/docs/transformers.js" target="_blank" rel="noreferrer" className="underline">🤗&nbsp;Transformers.js</a> and ONNX Runtime Web, meaning your conversations aren&#39;t sent to a server. You can even disconnect from the internet after the model has loaded!
+              </p>
+
+              <button
+                className="border px-4 py-2 rounded-lg bg-blue-400 text-white hover:bg-blue-500 disabled:bg-blue-100 disabled:cursor-not-allowed select-none"
+                onClick={() => {
+                  worker.current.postMessage({ type: 'load' });
+                  setStatus('loading');
+                }}
+                disabled={status !== null}
+              >
+                Load model
+              </button>
+            </div>
+          </div>
+        )}
+        {status === 'loading' && (<>
+          <div className="w-full max-w-[500px] text-left mx-auto p-4 bottom-0 mt-auto">
+            <p className="text-center mb-1">{loadingMessage}</p>
+            {progressItems.map(({ file, progress, total }, i) => (
+              <Progress key={i} text={file} percentage={progress} total={total} />
+            ))}
+          </div>
+        </>)}
+
+        {status === 'ready' && (<div
+          ref={chatContainerRef}
+          className="overflow-y-auto scrollbar-thin w-full flex flex-col items-center h-full"
+        >
+          <Chat messages={messages} />
+          <p className="text-center text-sm min-h-6 text-gray-500 dark:text-gray-300">
+            {tps && messages.length > 0 && (<>
+              {!isRunning &&
+                <span>Generated {numTokens} tokens in {(numTokens / tps).toFixed(2)} seconds&nbsp;&#40;</span>}
+              {<>
+                <span className="font-medium text-center mr-1 text-black dark:text-white">
+                  {tps.toFixed(2)}
+                </span>
+                <span className="text-gray-500 dark:text-gray-300">tokens/second</span>
+              </>}
+              {!isRunning && <>
+                <span className="mr-1">&#41;.</span>
+                <span className="underline cursor-pointer" onClick={() => {
+                  worker.current.postMessage({ type: 'reset' });
+                  setMessages([]);
+                }}>Reset</span>
+              </>}
+            </>)}
+          </p>
+        </div>)}
+
+        <div className="mt-2 border dark:bg-gray-700 rounded-lg w-[600px] max-w-[80%] max-h-[200px] mx-auto relative mb-3 flex">
+          <textarea
+            ref={textareaRef}
+            className="scrollbar-thin w-[550px] dark:bg-gray-700 px-3 py-4 rounded-lg bg-transparent border-none outline-none text-gray-800 disabled:text-gray-400 dark:text-gray-200 placeholder-gray-500 dark:placeholder-gray-400 disabled:placeholder-gray-200 resize-none disabled:cursor-not-allowed"
+            placeholder="Type your message..."
+            type="text"
+            rows={1}
+            value={input}
+            disabled={status !== 'ready'}
+            title={status === 'ready' ? "Model is ready" : "Model not loaded yet"}
+            onKeyDown={(e) => {
+              if (input.length > 0 && !isRunning && (e.key === "Enter" && !e.shiftKey)) {
+                e.preventDefault(); // Prevent default behavior of Enter key
+                onEnter(input);
+              }
+            }}
+            onInput={(e) => setInput(e.target.value)}
+          />
+          {isRunning
+            ? (<div className="cursor-pointer" onClick={onInterrupt}>
+              <StopIcon
+                className="h-8 w-8 p-1 rounded-md text-gray-800 dark:text-gray-100 absolute right-3 bottom-3"
+              />
+            </div>)
+            : input.length > 0
+              ? (<div className="cursor-pointer" onClick={() => onEnter(input)}>
+                <ArrowRightIcon
+                  className={`h-8 w-8 p-1 bg-gray-800 dark:bg-gray-100 text-white dark:text-black rounded-md absolute right-3 bottom-3`}
+                />
+              </div>)
+              : (<div>
+                <ArrowRightIcon
+                  className={`h-8 w-8 p-1 bg-gray-200 dark:bg-gray-600 text-gray-50 dark:text-gray-800 rounded-md absolute right-3 bottom-3`}
+                />
+              </div>)
+          }
+        </div>
+
+        <p className="text-xs text-gray-400 text-center mb-3">
+          Disclaimer: Generated content may be inaccurate or false.
+        </p>
+      </div>)
+      : (<div className="fixed w-screen h-screen bg-black z-10 bg-opacity-[92%] text-white text-2xl font-semibold flex justify-center items-center text-center">WebGPU is not supported<br />by this browser :&#40;</div>)
+  )
+}
+
+export default App
diff --git a/examples/webgpu-chat/src/components/Chat.css b/examples/webgpu-chat/src/components/Chat.css
new file mode 100644
index 000000000..f8ab98d4b
--- /dev/null
+++ b/examples/webgpu-chat/src/components/Chat.css
@@ -0,0 +1,112 @@
+@scope (.markdown) {
+
+    /* Code blocks */
+    pre {
+        margin: 0.5rem 0;
+        white-space: break-spaces;
+    }
+
+    code {
+        padding: 0.2em 0.4em;
+        border-radius: 4px;
+        font-family: Consolas, Monaco, 'Andale Mono', 'Ubuntu Mono', monospace;
+        font-size: 0.9em;
+    }
+
+    pre,
+    code {
+        background-color: #f2f2f2;
+    }
+
+    @media (prefers-color-scheme: dark) {
+
+        pre,
+        code {
+            background-color: #333;
+        }
+
+    }
+
+    pre:has(code) {
+        padding: 1rem 0.5rem;
+    }
+
+    pre>code {
+        padding: 0;
+    }
+
+    /* Headings */
+    h1,
+    h2,
+    h3,
+    h4,
+    h5,
+    h6 {
+        font-weight: 600;
+        line-height: 1.2;
+    }
+
+    h1 {
+        font-size: 2em;
+        margin: 1rem 0;
+    }
+
+    h2 {
+        font-size: 1.5em;
+        margin: 0.83rem 0;
+    }
+
+    h3 {
+        font-size: 1.25em;
+        margin: 0.67rem 0;
+    }
+
+    h4 {
+        font-size: 1em;
+        margin: 0.5rem 0;
+    }
+
+    h5 {
+        font-size: 0.875em;
+        margin: 0.33rem 0;
+    }
+
+    h6 {
+        font-size: 0.75em;
+        margin: 0.25rem 0;
+    }
+
+    h1,
+    h2,
+    h3,
+    h4,
+    h5,
+    h6:first-child {
+        margin-top: 0;
+    }
+
+    /* Unordered List */
+    ul {
+        list-style-type: disc;
+        margin-left: 1.5rem;
+    }
+
+    /* Ordered List */
+    ol {
+        list-style-type: decimal;
+        margin-left: 1.5rem;
+    }
+
+    /* List Items */
+    li {
+        margin: 0.25rem 0;
+    }
+
+    p:not(:first-child) {
+        margin-top: 0.75rem;
+    }
+
+    p:not(:last-child) {
+        margin-bottom: 0.75rem;
+    }
+}
\ No newline at end of file
diff --git a/examples/webgpu-chat/src/components/Chat.jsx b/examples/webgpu-chat/src/components/Chat.jsx
new file mode 100644
index 000000000..2fe7442bf
--- /dev/null
+++ b/examples/webgpu-chat/src/components/Chat.jsx
@@ -0,0 +1,42 @@
+import { marked } from 'marked';
+import DOMPurify from 'dompurify';
+
+import BotIcon from './icons/BotIcon';
+import UserIcon from './icons/UserIcon';
+
+import './Chat.css';
+
+export default function Chat({ messages }) {
+    const empty = messages.length === 0;
+
+    return (<div className={`flex-1 p-6 max-w-[960px] w-full ${empty ? 'flex flex-col items-center justify-end' : 'space-y-4'}`}>
+        {empty
+            ? <div className="text-xl">Ready!</div>
+            : messages.map((msg, i) => (
+                <div key={`message-${i}`} className="flex items-start space-x-4">
+                    {msg.role === 'assistant'
+                        ? (<>
+                            <BotIcon className="h-6 w-6 min-h-6 min-w-6 my-3 text-gray-500 dark:text-gray-300" />
+                            <div className="bg-gray-200 dark:bg-gray-700 rounded-lg p-4">
+                                <p className="min-h-6 text-gray-800 dark:text-gray-200 overflow-wrap-anywhere">{
+                                    msg.content.length > 0
+                                        ? <span className="markdown" dangerouslySetInnerHTML={{ __html: DOMPurify.sanitize(marked.parse(msg.content)) }} />
+                                        : (<span className="h-6 flex items-center gap-1">
+                                            <span className="w-2.5 h-2.5 bg-gray-600 dark:bg-gray-300 rounded-full animate-pulse"></span>
+                                            <span className="w-2.5 h-2.5 bg-gray-600 dark:bg-gray-300 rounded-full animate-pulse animation-delay-200"></span>
+                                            <span className="w-2.5 h-2.5 bg-gray-600 dark:bg-gray-300 rounded-full animate-pulse animation-delay-400"></span>
+                                        </span>)
+                                }</p>
+                            </div>
+                        </>
+                        ) : (<>
+                            <UserIcon className="h-6 w-6 min-h-6 min-w-6 my-3 text-gray-500 dark:text-gray-300" />
+                            <div className="bg-blue-500 text-white rounded-lg p-4">
+                                <p className="min-h-6 overflow-wrap-anywhere">{msg.content}</p>
+                            </div>
+                        </>)
+                    }
+                </div>
+            ))}
+    </div>)
+}
diff --git a/examples/webgpu-chat/src/components/Progress.jsx b/examples/webgpu-chat/src/components/Progress.jsx
new file mode 100644
index 000000000..9ce024cc8
--- /dev/null
+++ b/examples/webgpu-chat/src/components/Progress.jsx
@@ -0,0 +1,15 @@
+function formatBytes(size) {
+    const i = size == 0 ? 0 : Math.floor(Math.log(size) / Math.log(1024));
+    return +((size / Math.pow(1024, i)).toFixed(2)) * 1 + ['B', 'kB', 'MB', 'GB', 'TB'][i];
+}
+
+export default function Progress({ text, percentage, total }) {
+    percentage ??= 0;
+    return (
+        <div className="w-full bg-gray-100 dark:bg-gray-700 text-left rounded-lg overflow-hidden mb-0.5">
+            <div className="bg-blue-400 whitespace-nowrap px-1 text-sm" style={{ width: `${percentage}%` }}>
+                {text} ({percentage.toFixed(2)}%{isNaN(total) ? '' : ` of ${formatBytes(total)}`})
+            </div>
+        </div>
+    );
+}
diff --git a/examples/webgpu-chat/src/components/icons/ArrowRightIcon.jsx b/examples/webgpu-chat/src/components/icons/ArrowRightIcon.jsx
new file mode 100644
index 000000000..0ca5ed917
--- /dev/null
+++ b/examples/webgpu-chat/src/components/icons/ArrowRightIcon.jsx
@@ -0,0 +1,19 @@
+export default function ArrowRightIcon(props) {
+    return (
+        <svg
+            {...props}
+            xmlns="http://www.w3.org/2000/svg"
+            width="24"
+            height="24"
+            viewBox="0 0 24 24"
+            fill="none"
+            stroke="currentColor"
+            strokeWidth="2"
+            strokeLinecap="round"
+            strokeLinejoin="round"
+        >
+            <path d="M5 12h14" />
+            <path d="m12 5 7 7-7 7" />
+        </svg>
+    )
+}
\ No newline at end of file
diff --git a/examples/webgpu-chat/src/components/icons/BotIcon.jsx b/examples/webgpu-chat/src/components/icons/BotIcon.jsx
new file mode 100644
index 000000000..b8bd0ceae
--- /dev/null
+++ b/examples/webgpu-chat/src/components/icons/BotIcon.jsx
@@ -0,0 +1,23 @@
+export default function BotIcon(props) {
+    return (
+        <svg
+            {...props}
+            xmlns="http://www.w3.org/2000/svg"
+            width="24"
+            height="24"
+            viewBox="0 0 24 24"
+            fill="none"
+            stroke="currentColor"
+            strokeWidth="2"
+            strokeLinecap="round"
+            strokeLinejoin="round"
+        >
+            <path d="M12 8V4H8" />
+            <rect width="16" height="12" x="4" y="8" rx="2" />
+            <path d="M2 14h2" />
+            <path d="M20 14h2" />
+            <path d="M15 13v2" />
+            <path d="M9 13v2" />
+        </svg>
+    )
+}
\ No newline at end of file
diff --git a/examples/webgpu-chat/src/components/icons/StopIcon.jsx b/examples/webgpu-chat/src/components/icons/StopIcon.jsx
new file mode 100644
index 000000000..9b97f3723
--- /dev/null
+++ b/examples/webgpu-chat/src/components/icons/StopIcon.jsx
@@ -0,0 +1,19 @@
+export default function StopIcon(props) {
+    return (
+        <svg
+            {...props}
+            xmlns="http://www.w3.org/2000/svg"
+            width="24"
+            height="24"
+            viewBox="0 0 24 24"
+            fill="none"
+            stroke="currentColor"
+            strokeWidth="2"
+            strokeLinecap="round"
+            strokeLinejoin="round"
+        >
+            <path d="M21 12a9 9 0 1 1-18 0 9 9 0 0 1 18 0Z" />
+            <path fill="currentColor" d="M9 9.563C9 9.252 9.252 9 9.563 9h4.874c.311 0 .563.252.563.563v4.874c0 .311-.252.563-.563.563H9.564A.562.562 0 0 1 9 14.437V9.564Z" />
+        </svg>
+    )
+}
\ No newline at end of file
diff --git a/examples/webgpu-chat/src/components/icons/UserIcon.jsx b/examples/webgpu-chat/src/components/icons/UserIcon.jsx
new file mode 100644
index 000000000..cb09e7574
--- /dev/null
+++ b/examples/webgpu-chat/src/components/icons/UserIcon.jsx
@@ -0,0 +1,19 @@
+export default function UserIcon(props) {
+    return (
+        <svg
+            {...props}
+            xmlns="http://www.w3.org/2000/svg"
+            width="24"
+            height="24"
+            viewBox="0 0 24 24"
+            fill="none"
+            stroke="currentColor"
+            strokeWidth="2"
+            strokeLinecap="round"
+            strokeLinejoin="round"
+        >
+            <path d="M19 21v-2a4 4 0 0 0-4-4H9a4 4 0 0 0-4 4v2" />
+            <circle cx="12" cy="7" r="4" />
+        </svg>
+    )
+}
\ No newline at end of file
diff --git a/examples/webgpu-chat/src/index.css b/examples/webgpu-chat/src/index.css
new file mode 100644
index 000000000..8848bbd6d
--- /dev/null
+++ b/examples/webgpu-chat/src/index.css
@@ -0,0 +1,32 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+@layer utilities {
+  .scrollbar-thin::-webkit-scrollbar {
+    @apply w-2;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-track {
+    @apply rounded-full bg-gray-100 dark:bg-gray-700;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-thumb {
+    @apply rounded-full bg-gray-300 dark:bg-gray-600;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-thumb:hover {
+    @apply bg-gray-500;
+  }
+
+  .animation-delay-200 {
+    animation-delay: 200ms;
+  }
+  .animation-delay-400 {
+    animation-delay: 400ms;
+  }
+
+  .overflow-wrap-anywhere {
+    overflow-wrap: anywhere;
+  }
+}
diff --git a/examples/webgpu-chat/src/main.jsx b/examples/webgpu-chat/src/main.jsx
new file mode 100644
index 000000000..54b39dd1d
--- /dev/null
+++ b/examples/webgpu-chat/src/main.jsx
@@ -0,0 +1,10 @@
+import React from 'react'
+import ReactDOM from 'react-dom/client'
+import App from './App.jsx'
+import './index.css'
+
+ReactDOM.createRoot(document.getElementById('root')).render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>,
+)
diff --git a/examples/webgpu-chat/src/worker.js b/examples/webgpu-chat/src/worker.js
new file mode 100644
index 000000000..65d679670
--- /dev/null
+++ b/examples/webgpu-chat/src/worker.js
@@ -0,0 +1,174 @@
+
+import {
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    TextStreamer,
+    StoppingCriteria,
+} from '@xenova/transformers';
+
+
+class CallbackTextStreamer extends TextStreamer {
+    constructor(tokenizer, cb) {
+        super(tokenizer, {
+            skip_prompt: true,
+            skip_special_tokens: true,
+        });
+        this.cb = cb;
+    }
+
+    on_finalized_text(text) {
+        this.cb(text);
+    }
+}
+
+class InterruptableStoppingCriteria extends StoppingCriteria {
+    constructor() {
+        super();
+        this.interrupted = false;
+    }
+
+    interrupt() {
+        this.interrupted = true;
+    }
+
+    reset() {
+        this.interrupted = false;
+    }
+
+    _call(input_ids, scores) {
+        return new Array(input_ids.length).fill(this.interrupted);
+    }
+}
+
+const stopping_criteria = new InterruptableStoppingCriteria();
+
+async function hasFp16() {
+    try {
+        const adapter = await navigator.gpu.requestAdapter();
+        return adapter.features.has('shader-f16');
+    } catch (e) {
+        return false;
+    }
+}
+
+/**
+ * This class uses the Singleton pattern to ensure that only one instance of the model is loaded.
+ */
+class TextGenerationPipeline {
+    static model_id = null;
+    static model = null;
+    static tokenizer = null;
+    static streamer = null;
+
+    static async getInstance(progress_callback = null) {
+        // Choose the model based on whether fp16 is available
+        this.model_id ??= (await hasFp16())
+            ? 'Xenova/Phi-3-mini-4k-instruct_fp16'
+            : 'Xenova/Phi-3-mini-4k-instruct';
+
+        this.tokenizer ??= AutoTokenizer.from_pretrained(this.model_id, {
+            legacy: true,
+            progress_callback,
+        });
+
+        this.model ??= AutoModelForCausalLM.from_pretrained(this.model_id, {
+            dtype: 'q4',
+            device: 'webgpu',
+            use_external_data_format: true,
+            progress_callback,
+        });
+
+        return Promise.all([this.tokenizer, this.model]);
+    }
+}
+
+async function generate(messages) {
+    // Retrieve the text-generation pipeline.
+    const [tokenizer, model] = await TextGenerationPipeline.getInstance();
+
+    const inputs = tokenizer.apply_chat_template(messages, {
+        add_generation_prompt: true,
+        return_dict: true,
+    });
+
+    let startTime;
+    let numTokens = 0;
+    const cb = (output) => {
+        startTime ??= performance.now();
+
+        let tps;
+        if (numTokens++ > 0) {
+            tps = numTokens / (performance.now() - startTime) * 1000;
+        }
+        self.postMessage({
+            status: 'update',
+            output, tps, numTokens,
+        });
+    }
+
+    const streamer = new CallbackTextStreamer(tokenizer, cb);
+
+    // Tell the main thread we are starting
+    self.postMessage({ status: 'start' });
+
+    const outputs = await model.generate({
+        ...inputs,
+        max_new_tokens: 512,
+        streamer,
+        stopping_criteria,
+    });
+    const outputText = tokenizer.batch_decode(outputs, { skip_special_tokens: false });
+
+    // Send the output back to the main thread
+    self.postMessage({
+        status: 'complete',
+        output: outputText,
+    });
+}
+
+async function load() {
+    self.postMessage({
+        status: 'loading',
+        data: 'Loading model...'
+    });
+
+    // Load the pipeline and save it for future use.
+    const [tokenizer, model] = await TextGenerationPipeline.getInstance(x => {
+        // We also add a progress callback to the pipeline so that we can
+        // track model loading.
+        self.postMessage(x);
+    });
+
+    self.postMessage({
+        status: 'loading',
+        data: 'Compiling shaders and warming up model...'
+    });
+
+    // Run model with dummy input to compile shaders
+    const inputs = tokenizer('a');
+    await model.generate({ ...inputs, max_new_tokens: 1 });
+    self.postMessage({ status: 'ready' });
+}
+// Listen for messages from the main thread
+self.addEventListener('message', async (e) => {
+    const { type, data } = e.data;
+
+    switch (type) {
+        case 'load':
+            load();
+            break;
+
+        case 'generate':
+            stopping_criteria.reset();
+            generate(data);
+            break;
+
+        case 'interrupt':
+            stopping_criteria.interrupt();
+            break;
+
+        case 'reset':
+            stopping_criteria.reset();
+            break;
+    }
+});
diff --git a/examples/webgpu-chat/tailwind.config.js b/examples/webgpu-chat/tailwind.config.js
new file mode 100644
index 000000000..d37737fc0
--- /dev/null
+++ b/examples/webgpu-chat/tailwind.config.js
@@ -0,0 +1,12 @@
+/** @type {import('tailwindcss').Config} */
+export default {
+  content: [
+    "./index.html",
+    "./src/**/*.{js,ts,jsx,tsx}",
+  ],
+  theme: {
+    extend: {},
+  },
+  plugins: [],
+}
+
diff --git a/examples/webgpu-chat/vite.config.js b/examples/webgpu-chat/vite.config.js
new file mode 100644
index 000000000..5a33944a9
--- /dev/null
+++ b/examples/webgpu-chat/vite.config.js
@@ -0,0 +1,7 @@
+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+
+// https://vitejs.dev/config/
+export default defineConfig({
+  plugins: [react()],
+})
diff --git a/examples/webgpu-clip/.gitignore b/examples/webgpu-clip/.gitignore
new file mode 100644
index 000000000..a547bf36d
--- /dev/null
+++ b/examples/webgpu-clip/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/examples/webgpu-clip/index.html b/examples/webgpu-clip/index.html
new file mode 100644
index 000000000..4a87dacb4
--- /dev/null
+++ b/examples/webgpu-clip/index.html
@@ -0,0 +1,39 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Transformers.js | real-time CLIP</title>
+</head>
+
+<body>
+  <h1>
+    Real-time zero-shot image classification (WebGPU)
+  </h1>
+  <h3>
+    Runs locally in your browser w/
+    <a href="https://github.com/huggingface/transformers.js" target="_blank">🤗 Transformers.js</a>
+  </h3>
+  <div id="container">
+    <video id="video" autoplay muted playsinline></video>
+    <div id="overlay"></div>
+  </div>
+  <div id="controls">
+    <div title="Labels used to perform zero-shot image classification">
+      <label>Labels (comma-separated)</label>
+      <br>
+      <input id="labels" type="text" disabled>
+    </div>
+    <div title="Template used to perform zero-shot image classification">
+      <label>Hypothesis template</label>
+      <br>
+      <input id="template" type="text" value="A photo of a {}" disabled>
+    </div>
+  </div>
+  <label id="status"></label>
+
+  <script type="module" src="/main.js"></script>
+</body>
+
+</html>
\ No newline at end of file
diff --git a/examples/webgpu-clip/main.js b/examples/webgpu-clip/main.js
new file mode 100644
index 000000000..35f17512a
--- /dev/null
+++ b/examples/webgpu-clip/main.js
@@ -0,0 +1,169 @@
+
+import {
+    AutoTokenizer,
+    CLIPTextModelWithProjection,
+    AutoProcessor,
+    CLIPVisionModelWithProjection,
+    RawImage,
+    dot,
+    softmax,
+} from '@xenova/transformers';
+
+import './style.css';
+
+// Reference the elements that we will need
+const status = document.getElementById('status');
+const container = document.getElementById('container');
+const video = document.getElementById('video');
+const labelsInput = document.getElementById('labels');
+const templateInput = document.getElementById('template');
+const overlay = document.getElementById('overlay');
+
+status.textContent = 'Loading model (300MB)...';
+
+// Use fp16 if available, otherwise use fp32
+async function hasFp16() {
+    try {
+        const adapter = await navigator.gpu.requestAdapter();
+        return adapter.features.has('shader-f16');
+    } catch (e) {
+        return false;
+    }
+}
+const dtype = (await hasFp16()) ? 'fp16' : 'fp32';
+
+// Load object detection pipeline
+const model_id = 'Xenova/clip-vit-base-patch16';
+let tokenizer, text_model, processor, vision_model;
+try {
+    // Load tokenizer and text model
+    tokenizer = await AutoTokenizer.from_pretrained(model_id);
+    text_model = await CLIPTextModelWithProjection.from_pretrained(model_id, {
+        device: 'webgpu',
+        dtype,
+    });
+
+    // Load processor and vision model
+    processor = await AutoProcessor.from_pretrained(model_id);
+    vision_model = await CLIPVisionModelWithProjection.from_pretrained(model_id, {
+        device: 'webgpu',
+        dtype,
+    });
+
+} catch (err) {
+    status.textContent = err.message;
+    alert(err.message)
+    throw err;
+}
+
+labelsInput.disabled = false;
+templateInput.disabled = false;
+
+status.textContent = 'Ready';
+
+// See `model.logit_scale` parameter of original model
+const exp_logit_scale = Math.exp(4.6052);
+
+const IMAGE_SIZE = 224;
+const canvas = document.createElement('canvas');
+canvas.width = canvas.height = IMAGE_SIZE;
+const context = canvas.getContext('2d', { willReadFrequently: true });
+
+let isProcessing = false;
+let previousTime;
+let textEmbeddings;
+let prevTextInputs;
+let prevTemplate;
+let labels;
+
+function onFrameUpdate() {
+    if (!isProcessing) {
+        isProcessing = true;
+        (async function () {
+
+            // If text inputs have changed, update the embeddings
+            if (prevTextInputs !== labelsInput.value || prevTemplate !== templateInput.value) {
+                textEmbeddings = null;
+                prevTextInputs = labelsInput.value;
+                prevTemplate = templateInput.value;
+                labels = prevTextInputs.split(/\s*,\s*/).filter(x => x);
+
+                if (labels.length > 0) {
+                    const texts = labels.map(x => templateInput.value.replaceAll('{}', x));
+
+                    const text_inputs = tokenizer(texts, { padding: true, truncation: true });
+
+                    // Compute embeddings
+                    const { text_embeds } = await text_model(text_inputs);
+                    textEmbeddings = text_embeds.normalize().tolist();
+                } else {
+                    overlay.innerHTML = '';
+                }
+            }
+
+            if (textEmbeddings) {
+                // Read the current frame from the video
+                context.drawImage(video, 0, 0, IMAGE_SIZE, IMAGE_SIZE);
+                const pixelData = context.getImageData(0, 0, IMAGE_SIZE, IMAGE_SIZE).data;
+                const image = new RawImage(pixelData, IMAGE_SIZE, IMAGE_SIZE, 4);
+
+                const image_inputs = await processor(image);
+
+                // Compute embeddings
+                const { image_embeds } = await vision_model(image_inputs);
+                const imageEmbedding = image_embeds.normalize().tolist()[0];
+
+                // Compute similarity
+                const similarities = textEmbeddings.map(
+                    x => dot(x, imageEmbedding) * exp_logit_scale
+                );
+
+                const sortedIndices = softmax(similarities)
+                    .map((x, i) => [x, i])
+                    .sort((a, b) => b[0] - a[0]);
+
+                // Update UI
+                overlay.innerHTML = '';
+                for (const [score, index] of sortedIndices) {
+                    overlay.appendChild(document.createTextNode(`${labels[index]}: ${score.toFixed(2)}`));
+                    overlay.appendChild(document.createElement('br'));
+                }
+            }
+
+            if (previousTime !== undefined) {
+                const fps = 1000 / (performance.now() - previousTime);
+                status.textContent = `FPS: ${fps.toFixed(2)}`;
+            }
+            previousTime = performance.now();
+            isProcessing = false;
+        })();
+    }
+
+    window.requestAnimationFrame(onFrameUpdate);
+}
+
+// Start the video stream
+navigator.mediaDevices.getUserMedia(
+    { video: true }, // Ask for video
+).then((stream) => {
+    // Set up the video and canvas elements.
+    video.srcObject = stream;
+    video.play();
+
+    const videoTrack = stream.getVideoTracks()[0];
+    const { width, height } = videoTrack.getSettings();
+
+    video.width = width;
+    video.height = height;
+
+    // Set container width and height depending on the image aspect ratio
+    const ar = width / height;
+    const [cw, ch] = (ar > 720 / 405) ? [720, 720 / ar] : [405 * ar, 405];
+    container.style.width = `${cw}px`;
+    container.style.height = `${ch}px`;
+
+    // Start the animation loop
+    window.requestAnimationFrame(onFrameUpdate);
+}).catch((error) => {
+    alert(error);
+});
diff --git a/examples/webgpu-clip/package.json b/examples/webgpu-clip/package.json
new file mode 100644
index 000000000..44888248a
--- /dev/null
+++ b/examples/webgpu-clip/package.json
@@ -0,0 +1,17 @@
+{
+  "name": "webgpu-clip",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "preview": "vite preview"
+  },
+  "devDependencies": {
+    "vite": "^5.2.10"
+  },
+  "dependencies": {
+    "@xenova/transformers": "^3.0.0"
+  }
+}
diff --git a/examples/webgpu-clip/style.css b/examples/webgpu-clip/style.css
new file mode 100644
index 000000000..e08c41d1a
--- /dev/null
+++ b/examples/webgpu-clip/style.css
@@ -0,0 +1,91 @@
+* {
+  box-sizing: border-box;
+  padding: 0;
+  margin: 0;
+  font-family: sans-serif;
+}
+
+html,
+body {
+  height: 100%;
+}
+
+body {
+  padding: 16px 32px;
+}
+
+body,
+#container {
+  display: flex;
+  flex-direction: column;
+  justify-content: center;
+  align-items: center;
+}
+
+#controls {
+  display: flex;
+  padding: 1rem;
+  gap: 1rem;
+}
+
+#controls>div {
+  text-align: center;
+}
+
+h1,
+h3 {
+  text-align: center;
+}
+
+h3 {
+  margin-top: 0.5rem;
+}
+
+#container {
+  position: relative;
+  width: 720px;
+  height: 405px;
+  max-width: 100%;
+  max-height: 100%;
+  border: 2px dashed #D1D5DB;
+  border-radius: 0.75rem;
+  overflow: hidden;
+  margin-top: 1rem;
+  background-size: 100% 100%;
+  background-position: center;
+  background-repeat: no-repeat;
+}
+
+#status {
+  min-height: 16px;
+  margin: 8px 0;
+}
+
+video {
+  width: 100%;
+  height: 100%;
+}
+
+input[type="text"] {
+  padding: 0.25rem 0.5rem;
+  border: 1px solid #D1D5DB;
+  border-radius: 0.25rem;
+  margin-top: 2px;
+}
+
+input[type="range"] {
+  margin-top: 6px;
+}
+
+#overlay {
+  position: absolute;
+  top: 0;
+  left: 0;
+  background-color: rgba(255, 255, 255, 0.9);
+  font-size: 1.25rem;
+  border-radius: 2px;
+}
+
+#overlay:not(:empty) {
+  padding: 0.5rem;
+}
\ No newline at end of file
diff --git a/examples/webgpu-clip/vite.config.js b/examples/webgpu-clip/vite.config.js
new file mode 100644
index 000000000..6c32f52df
--- /dev/null
+++ b/examples/webgpu-clip/vite.config.js
@@ -0,0 +1,6 @@
+import { defineConfig } from 'vite';
+export default defineConfig({
+  build: {
+    target: 'esnext'
+  }
+});
diff --git a/examples/webgpu-embedding-benchmark/.gitignore b/examples/webgpu-embedding-benchmark/.gitignore
new file mode 100644
index 000000000..a547bf36d
--- /dev/null
+++ b/examples/webgpu-embedding-benchmark/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/examples/webgpu-embedding-benchmark/index.html b/examples/webgpu-embedding-benchmark/index.html
new file mode 100644
index 000000000..8b4a9d361
--- /dev/null
+++ b/examples/webgpu-embedding-benchmark/index.html
@@ -0,0 +1,64 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Transformers.js | WebGPU Benchmark</title>
+</head>
+
+<body>
+  <h1>
+    <a href="https://github.com/huggingface/transformers.js" target="_blank">🤗 Transformers.js</a> WebGPU Benchmark
+  </h1>
+  <p>
+    This benchmark measures the execution time of BERT-based embedding models
+    using the WASM and WebGPU execution providers across different batch sizes.
+  </p>
+  <div id="chart-container">
+    <canvas id="chart"></canvas>
+  </div>
+  <div>
+    <button id="start" disabled>Start Benchmark</button>
+    <button id="stop" disabled>Stop Benchmark</button>
+  </div>
+  <label id="status"></label>
+  <details open>
+    <summary>Options</summary>
+    <div>
+      <input class="tests" type="checkbox" value="WASM (int8)" data-color="33,150,243" data-device="wasm"
+        data-dtype="int8"> WASM (int8)<br />
+      <input class="tests" type="checkbox" value="WASM (fp16)" data-color="63,81,181" data-device="wasm"
+        data-dtype="fp16"> WASM (fp16)<br />
+      <input class="tests" type="checkbox" value="WASM (fp32)" data-color="46,204,113" data-device="wasm"
+        data-dtype="fp32" checked> WASM (fp32)<br />
+      <!-- <input class="tests" type="checkbox" value="WebGPU (int8)" data-color="233,30,99" data-device="webgpu"
+        data-dtype="int8"> WebGPU (int8)<br /> -->
+      <input class="tests" type="checkbox" value="WebGPU (fp16)" data-color="255,193,7" data-device="webgpu"
+        data-dtype="fp16"> WebGPU (fp16)<br />
+      <input class="tests" type="checkbox" value="WebGPU (fp32)" data-color="0,150,136" data-device="webgpu"
+        data-dtype="fp32" checked> WebGPU (fp32)<br />
+    </div>
+    <hr />
+    <div>
+      <label>Model ID</label>
+      <input id="model-id" value="Xenova/all-MiniLM-L6-v2" />
+    </div>
+    <div>
+      <label>Batch sizes</label>
+      <input id="batch-sizes" value="1, 2, 4, 8, 16, 32" />
+    </div>
+    <div>
+      <label>Sequence length</label>
+      <input id="sequence-length" type="number" min="1" max="512" value="512" />
+    </div>
+    <hr />
+    <div>
+      <input id="x-scale" type="checkbox" /> Log scale (x) <br />
+      <input id="y-scale" type="checkbox" /> Log scale (y) <br />
+    </div>
+  </details>
+  <script type="module" src="/main.js"></script>
+</body>
+
+</html>
\ No newline at end of file
diff --git a/examples/webgpu-embedding-benchmark/main.js b/examples/webgpu-embedding-benchmark/main.js
new file mode 100644
index 000000000..bdf731395
--- /dev/null
+++ b/examples/webgpu-embedding-benchmark/main.js
@@ -0,0 +1,305 @@
+import './style.css';
+import { env, AutoModel, ones } from '@xenova/transformers';
+import Chart from 'chart.js/auto';
+
+// Throw an error if WebGPU is not supported
+if (!navigator.gpu) {
+  const err = 'WebGPU is not supported by this browser.';
+  alert(err)
+  throw Error(err);
+}
+
+env.backends.onnx.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.17.1/dist/';
+env.backends.onnx.wasm.numThreads = 1;
+
+// Reference the elements that we will need
+const ctx = document.getElementById('chart');
+const batchSizes = document.getElementById('batch-sizes');
+const xscale = document.getElementById('x-scale');
+const yscale = document.getElementById('y-scale');
+const sequenceLength = document.getElementById('sequence-length');
+const modelID = document.getElementById('model-id');
+const status = document.getElementById('status');
+const start = document.getElementById('start');
+const stop = document.getElementById('stop');
+const tests = document.getElementsByClassName('tests');
+
+// Benchmark settings
+const NUM_WARMUP_STEPS = 3;
+const MODEL_CACHE = new Map();
+
+// Chart configuration
+const initChart = () => {
+  const config = {
+    type: 'line',
+    data: {
+      labels: [],
+      datasets: [],
+    },
+    options: {
+      responsive: true,
+      maintainAspectRatio: false,
+      plugins: {
+        legend: {
+          position: 'top',
+        },
+      },
+      scales: {
+        x: {
+          title: {
+            display: true,
+            text: 'Batch size',
+          },
+          min: 1,
+        },
+        y: {
+          title: {
+            display: true,
+            text: 'Time (ms)',
+          },
+        }
+      }
+    },
+  };
+  const chart = new Chart(ctx, config);
+  return chart;
+}
+let chart = initChart();
+const toggleScale = (axis, enabled) => {
+  chart.options.scales[axis].type = enabled ? 'logarithmic' : 'linear';
+  chart.update();
+}
+
+const getSelectedTests = () => {
+  return [...tests].filter(x => x.checked);
+}
+
+const updateDatasets = () => {
+  chart.data.datasets = getSelectedTests().map(test => {
+    const color = test.getAttribute('data-color');
+    return {
+      label: test.value,
+      data: [],
+      borderColor: `rgba(${color}, 1)`,
+      backgroundColor: `rgba(${color}, 0.5)`,
+    }
+  })
+  chart.update();
+}
+updateDatasets();
+[...tests].forEach(test => test.addEventListener('change', updateDatasets));
+
+xscale.addEventListener('change', () => toggleScale('x', xscale.checked));
+yscale.addEventListener('change', () => toggleScale('y', yscale.checked));
+
+const generateDummyInputs = (batch_size, seqLength) => {
+  const inputs = ones([batch_size, seqLength]);
+
+  const model_inputs = {
+    input_ids: inputs,
+    attention_mask: inputs,
+  }
+  return model_inputs;
+}
+
+let adapterInfo;
+let gpuHasFp16 = false;
+try {
+  // Shouldn't fail since the WebGPU model has loaded successfully
+  const adapter = await navigator.gpu.requestAdapter();
+  adapterInfo = await adapter.requestAdapterInfo();
+  gpuHasFp16 = adapter.features.has('shader-f16')
+} catch (err) {
+  adapterInfo = {};
+}
+if (!gpuHasFp16) {
+  const element = document.querySelector('.tests[data-device="webgpu"][data-dtype="fp16"]');
+  element.setAttribute('unsupported', true);
+  element.disabled = true;
+  element.title = 'This device does not support fp16 on WebGPU';
+}
+
+status.textContent = 'Ready';
+
+let interrupted = false;
+start.addEventListener('click', async () => {
+  const validTests = [...tests].filter(test => !test.getAttribute('unsupported'))
+  // Update UI
+  start.disabled = true;
+  stop.disabled = false;
+  batchSizes.disabled = true;
+  sequenceLength.disabled = true;
+  modelID.disabled = true;
+  validTests.forEach(test => test.disabled = true);
+  interrupted = false;
+
+  // Get parameters
+  const model_id = modelID.value;
+  const batch_sizes = batchSizes.value.split(',').map(x => parseInt(x)).filter(x => x);
+  const seqLength = parseInt(sequenceLength.value);
+  const selectedTests = getSelectedTests().map(x => ({
+    label: x.value,
+    dtype: x.getAttribute('data-dtype'),
+    device: x.getAttribute('data-device'),
+  }));
+
+  // Reset
+  chart.destroy();
+  chart = initChart();
+  updateDatasets();
+
+  // NOTE: Models must be loaded sequentially (otherwise it will fail due to multiple calls to initWasm())
+  const testsToRun = new Map();
+  for (const test of selectedTests) {
+    const { label, dtype, device, quantized } = test;
+
+    const key = `${model_id}///${label}`;
+
+    const cached = MODEL_CACHE.get(key);
+    if (cached) {
+      testsToRun.set(label, cached);
+      continue;
+    }
+    status.textContent = 'Loading model(s)...';
+
+    try {
+      const model = await AutoModel.from_pretrained(model_id, {
+        quantized,
+        device,
+        dtype,
+      });
+      MODEL_CACHE.set(key, model);
+      testsToRun.set(label, model);
+    } catch (err) {
+      status.textContent = err.message;
+      alert(err.message)
+      throw err;
+    }
+  }
+
+  status.textContent = 'Warming up...';
+
+  // Warm up: This is important for the WebGPU execution provider, which compiles the shaders on first load
+  for (let i = 0; i < NUM_WARMUP_STEPS; ++i) {
+    const model_inputs = generateDummyInputs(1, seqLength);
+    for (const [label, model] of testsToRun) {
+      await model(model_inputs);
+    }
+  }
+
+  status.textContent = 'Running benchmark...';
+
+  for (const batch_size of batch_sizes) {
+    if (interrupted) break;
+
+    const model_inputs = generateDummyInputs(batch_size, seqLength);
+
+    const times = []
+
+    for (const [label, model] of testsToRun) {
+      const start = performance.now();
+      await model(model_inputs);
+      const end = performance.now();
+      times.push(end - start);
+    }
+
+    chart.data.labels.push(batch_size);
+    for (let i = 0; i < times.length; ++i) {
+      chart.data.datasets[i].data.push(times[i]);
+    }
+    chart.update();
+  }
+
+  // Calculate max speedup:
+  if (chart.data.labels.length === 0) return;
+
+  const testNames = [...testsToRun.keys()];
+  const table = generateResultsTable(model_id, testNames, chart.data, seqLength);
+
+
+  // Calculate slowest and fastest times
+  let minMaxTimes = [Infinity, 0];
+  let minMaxIndices = [0, 0];
+  for (let i = 0; i < chart.data.datasets.length; i++) {
+    const lastTime = chart.data.datasets[i].data.at(-1);
+    if (lastTime < minMaxTimes[0]) {
+      minMaxTimes[0] = lastTime;
+      minMaxIndices[0] = i;
+    }
+    if (lastTime > minMaxTimes[1]) {
+      minMaxTimes[1] = lastTime;
+      minMaxIndices[1] = i;
+    }
+  }
+
+  const speedup = minMaxTimes[1] / minMaxTimes[0];
+  const roundedSpeedup = speedup.toFixed(2);
+  const params = new URLSearchParams({
+    title: `⚡ WebGPU Benchmark Results (${roundedSpeedup}x speedup)`,
+    description: table.outerHTML,
+  });
+
+  const paramsStr = params.toString();
+  status.innerHTML = `⚡ Done! ${testNames.at(minMaxIndices[0])} is <strong>${roundedSpeedup}x</strong> faster than ${testNames.at(minMaxIndices[1])}! ⚡<br><a href="https://huggingface.co/spaces/Xenova/webgpu-embedding-benchmark/discussions/new?${paramsStr}" target="_blank">Share results</a>`;
+  start.disabled = false;
+  stop.disabled = true;
+  batchSizes.disabled = false;
+  sequenceLength.disabled = false;
+  modelID.disabled = false;
+  validTests.forEach(test => test.disabled = false);
+});
+
+start.disabled = false;
+
+stop.addEventListener('click', () => {
+  status.textContent = 'Stopping...';
+  interrupted = true;
+  stop.disabled = true;
+});
+
+function generateResultsTable(model_id, testNames, data, sequence_length) {
+
+  const datasets = data.datasets.map(d => d.data);
+  const batch_sizes = data.labels;
+
+  const container = document.createElement('div');
+
+  const table = document.createElement('table');
+  const thead = table.createTHead();
+  const tbody = table.createTBody();
+
+  // Add header row
+  const headerRow = thead.insertRow();
+  headerRow.insertCell().textContent = 'Batch Size';
+  testNames.forEach(model => {
+    headerRow.insertCell().textContent = model;
+  });
+
+  // Add data rows
+  batch_sizes.forEach((batchSize, rowIndex) => {
+    const row = tbody.insertRow();
+    row.insertCell().textContent = batchSize;
+    datasets.forEach(dataset => {
+      row.insertCell().textContent = dataset[rowIndex].toFixed(2);
+    });
+  });
+
+  container.appendChild(table);
+
+  const createBulletPoint = (text) => {
+    const li = document.createElement('li');
+    li.textContent = text;
+    return li;
+  }
+
+  // Add other information
+  const info = document.createElement('ul');
+  info.appendChild(createBulletPoint(`Model: ${model_id}`));
+  info.appendChild(createBulletPoint(`Tests run: ${testNames.join(', ')}`));
+  info.appendChild(createBulletPoint(`Sequence length: ${sequence_length}`));
+  info.appendChild(createBulletPoint(`Browser: ${navigator.userAgent}`));
+  info.appendChild(createBulletPoint(`GPU: vendor=${adapterInfo.vendor}, architecture=${adapterInfo.architecture}, device=${adapterInfo.device}, description=${adapterInfo.description}`));
+  container.appendChild(info);
+
+  return container;
+}
diff --git a/examples/webgpu-embedding-benchmark/package.json b/examples/webgpu-embedding-benchmark/package.json
new file mode 100644
index 000000000..d90288d7a
--- /dev/null
+++ b/examples/webgpu-embedding-benchmark/package.json
@@ -0,0 +1,18 @@
+{
+  "name": "webgpu-embedding-benchmark",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "preview": "vite preview"
+  },
+  "devDependencies": {
+    "vite": "^5.0.12"
+  },
+  "dependencies": {
+    "@xenova/transformers": "^3.0.0",
+    "chart.js": "^4.4.2"
+  }
+}
diff --git a/examples/webgpu-embedding-benchmark/style.css b/examples/webgpu-embedding-benchmark/style.css
new file mode 100644
index 000000000..9253d75e3
--- /dev/null
+++ b/examples/webgpu-embedding-benchmark/style.css
@@ -0,0 +1,87 @@
+* {
+  box-sizing: border-box;
+  padding: 0;
+  margin: 0;
+  font-family: sans-serif;
+}
+
+html,
+body {
+  height: 100%;
+}
+
+body {
+  padding: 16px 32px;
+  display: flex;
+  flex-direction: column;
+  justify-content: center;
+  align-items: center;
+}
+
+h1 {
+  text-align: center;
+}
+
+#status {
+  min-height: 16px;
+  margin: 8px 0;
+  text-align: center;
+}
+
+button {
+  transition: all .25s;
+  background: rgba(40, 44, 52, 0.05);
+  border: 1px solid transparent;
+  border-radius: 6px;
+  color: #3080d0;
+  text-decoration: none !important;
+  display: inline-block;
+  font-size: 14px;
+  font-weight: 500;
+  padding: 8px 16px;
+  cursor: pointer;
+  -webkit-user-select: none;
+  -moz-user-select: none;
+  user-select: none;
+}
+
+button:disabled {
+  background: rgba(40, 44, 52, 0.1);
+  color: #a0a0a0;
+  cursor: not-allowed;
+}
+
+button:hover {
+  background: rgba(40, 44, 52, 0.1);
+}
+
+p {
+  text-align: center;
+  font-size: 12px;
+  max-width: 600px;
+  padding: 8px;
+}
+
+#chart-container {
+  position: relative;
+  height: 60vh;
+  width: min(90vw, 800px);
+  padding-right: 50px;
+  margin-bottom: 10px;
+}
+
+details {
+  position: fixed;
+  background-color: white;
+  right: 0;
+  top: 0;
+  padding: 16px;
+}
+
+summary {
+  text-align: right;
+}
+
+hr {
+  margin: 8px 0;
+}
diff --git a/examples/webgpu-embedding-benchmark/vite.config.js b/examples/webgpu-embedding-benchmark/vite.config.js
new file mode 100644
index 000000000..6c32f52df
--- /dev/null
+++ b/examples/webgpu-embedding-benchmark/vite.config.js
@@ -0,0 +1,6 @@
+import { defineConfig } from 'vite';
+export default defineConfig({
+  build: {
+    target: 'esnext'
+  }
+});
diff --git a/examples/webgpu-video-background-removal/.gitignore b/examples/webgpu-video-background-removal/.gitignore
new file mode 100644
index 000000000..a547bf36d
--- /dev/null
+++ b/examples/webgpu-video-background-removal/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/examples/webgpu-video-background-removal/index.html b/examples/webgpu-video-background-removal/index.html
new file mode 100644
index 000000000..8e71df5a9
--- /dev/null
+++ b/examples/webgpu-video-background-removal/index.html
@@ -0,0 +1,43 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Transformers.js | Real-time background removal</title>
+</head>
+
+<body>
+  <h1>
+    Real-time background removal w/
+    <a href="https://github.com/huggingface/transformers.js" target="_blank">🤗 Transformers.js</a>
+  </h1>
+  <h4>
+    Runs locally in your browser, powered by
+    <a href="https://huggingface.co/Xenova/modnet" target="_blank">MODNet</a>
+  </h4>
+  <div id="container">
+    <video id="video" autoplay muted playsinline></video>
+    <canvas id="canvas" width="360" height="240"></canvas>
+    <canvas id="output-canvas" width="360" height="240"></canvas>
+  </div>
+  <div id="controls">
+    <div title="Read frames from your webcam and process them at a lower size (lower = faster)">
+      <label>Stream scale</label>
+      (<label id="scale-value">0.5</label>)
+      <br>
+      <input id="scale" type="range" min="0.1" max="1" step="0.1" value="0.5" disabled>
+    </div>
+    <div title="The length of the shortest edge of the image (lower = faster)">
+      <label>Image size</label>
+      (<label id="size-value">256</label>)
+      <br>
+      <input id="size" type="range" min="64" max="512" step="32" value="256" disabled>
+    </div>
+  </div>
+  <label id="status"></label>
+
+  <script type="module" src="/main.js"></script>
+</body>
+
+</html>
\ No newline at end of file
diff --git a/examples/webgpu-video-background-removal/main.js b/examples/webgpu-video-background-removal/main.js
new file mode 100644
index 000000000..620f21afb
--- /dev/null
+++ b/examples/webgpu-video-background-removal/main.js
@@ -0,0 +1,128 @@
+import './style.css';
+
+import { env, AutoModel, AutoProcessor, RawImage } from '@xenova/transformers';
+
+env.backends.onnx.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.17.1/dist/';
+env.backends.onnx.wasm.numThreads = 1;
+
+// Reference the elements that we will need
+const status = document.getElementById('status');
+const container = document.getElementById('container');
+const canvas = document.getElementById('canvas');
+const outputCanvas = document.getElementById('output-canvas');
+const video = document.getElementById('video');
+const sizeSlider = document.getElementById('size');
+const sizeLabel = document.getElementById('size-value');
+const scaleSlider = document.getElementById('scale');
+const scaleLabel = document.getElementById('scale-value');
+
+function setStreamSize(width, height) {
+    video.width = outputCanvas.width = canvas.width = Math.round(width);
+    video.height = outputCanvas.height = canvas.height = Math.round(height);
+}
+
+status.textContent = 'Loading model...';
+
+// Load model and processor
+const model_id = 'Xenova/modnet';
+let model;
+try {
+    model = await AutoModel.from_pretrained(model_id, {
+        device: 'webgpu',
+        dtype: 'fp32', // TODO: add fp16 support
+    });
+} catch (err) {
+    status.textContent = err.message;
+    alert(err.message)
+    throw err;
+}
+
+const processor = await AutoProcessor.from_pretrained(model_id);
+
+// Set up controls
+let size = 256;
+processor.feature_extractor.size = { shortest_edge: size };
+sizeSlider.addEventListener('input', () => {
+    size = Number(sizeSlider.value);
+    processor.feature_extractor.size = { shortest_edge: size };
+    sizeLabel.textContent = size;
+});
+sizeSlider.disabled = false;
+
+let scale = 0.5;
+scaleSlider.addEventListener('input', () => {
+    scale = Number(scaleSlider.value);
+    setStreamSize(video.videoWidth * scale, video.videoHeight * scale);
+    scaleLabel.textContent = scale;
+});
+scaleSlider.disabled = false;
+
+status.textContent = 'Ready';
+
+let isProcessing = false;
+let previousTime;
+const context = canvas.getContext('2d', { willReadFrequently: true });
+const outputContext = outputCanvas.getContext('2d', { willReadFrequently: true });
+function updateCanvas() {
+    const { width, height } = canvas;
+
+    if (!isProcessing) {
+        isProcessing = true;
+        (async function () {
+            // Read the current frame from the video
+            context.drawImage(video, 0, 0, width, height);
+            const currentFrame = context.getImageData(0, 0, width, height);
+            const image = new RawImage(currentFrame.data, width, height, 4);
+
+            // Pre-process image
+            const inputs = await processor(image);
+
+            // Predict alpha matte
+            const { output } = await model({ input: inputs.pixel_values });
+
+            const mask = await RawImage.fromTensor(output[0].mul(255).to('uint8')).resize(width, height);
+
+            // Update alpha channel
+            const outPixelData = currentFrame;
+            for (let i = 0; i < mask.data.length; ++i) {
+                outPixelData.data[4 * i + 3] = mask.data[i];
+            }
+            outputContext.putImageData(outPixelData, 0, 0);
+
+            if (previousTime !== undefined) {
+                const fps = 1000 / (performance.now() - previousTime);
+                status.textContent = `FPS: ${fps.toFixed(2)}`;
+            }
+            previousTime = performance.now();
+
+            isProcessing = false;
+        })();
+    }
+
+    window.requestAnimationFrame(updateCanvas);
+}
+
+// Start the video stream
+navigator.mediaDevices.getUserMedia(
+    { video: true }, // Ask for video
+).then((stream) => {
+    // Set up the video and canvas elements.
+    video.srcObject = stream;
+    video.play();
+
+    const videoTrack = stream.getVideoTracks()[0];
+    const { width, height } = videoTrack.getSettings();
+
+    setStreamSize(width * scale, height * scale);
+
+    // Set container width and height depending on the image aspect ratio
+    const ar = width / height;
+    const [cw, ch] = (ar > 720 / 405) ? [720, 720 / ar] : [405 * ar, 405];
+    container.style.width = `${cw}px`;
+    container.style.height = `${ch}px`;
+
+    // Start the animation loop
+    setTimeout(updateCanvas, 50);
+}).catch((error) => {
+    alert(error);
+});
diff --git a/examples/webgpu-video-background-removal/package.json b/examples/webgpu-video-background-removal/package.json
new file mode 100644
index 000000000..9ebe47afe
--- /dev/null
+++ b/examples/webgpu-video-background-removal/package.json
@@ -0,0 +1,17 @@
+{
+  "name": "webgpu-video-background-removal",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "preview": "vite preview"
+  },
+  "devDependencies": {
+    "vite": "^5.0.12"
+  },
+  "dependencies": {
+    "@xenova/transformers": "^3.0.0"
+  }
+}
diff --git a/examples/webgpu-video-background-removal/style.css b/examples/webgpu-video-background-removal/style.css
new file mode 100644
index 000000000..a86729e1c
--- /dev/null
+++ b/examples/webgpu-video-background-removal/style.css
@@ -0,0 +1,87 @@
+* {
+  box-sizing: border-box;
+  padding: 0;
+  margin: 0;
+  font-family: sans-serif;
+}
+
+html,
+body {
+  height: 100%;
+}
+
+body {
+  padding: 16px 32px;
+}
+
+body,
+#container {
+  display: flex;
+  flex-direction: column;
+  justify-content: center;
+  align-items: center;
+}
+
+#controls {
+  display: flex;
+  padding: 1rem;
+  gap: 1rem;
+}
+
+#controls>div {
+  text-align: center;
+}
+
+h1,
+h4 {
+  text-align: center;
+}
+
+h4 {
+  margin-top: 0.5rem;
+}
+
+#container {
+  position: relative;
+  width: 720px;
+  height: 405px;
+  max-width: 100%;
+  max-height: 100%;
+  border: 2px dashed #D1D5DB;
+  border-radius: 0.75rem;
+  overflow: hidden;
+  margin-top: 1rem;
+  background-size: 100% 100%;
+  background-position: center;
+  background-repeat: no-repeat;
+}
+
+#overlay,
+canvas {
+  position: absolute;
+  width: 100%;
+  height: 100%;
+}
+
+#status {
+  min-height: 16px;
+  margin: 8px 0;
+}
+
+.bounding-box {
+  position: absolute;
+  box-sizing: border-box;
+  border: solid 2px;
+}
+
+.bounding-box-label {
+  color: white;
+  position: absolute;
+  font-size: 12px;
+  margin: -16px 0 0 -2px;
+  padding: 1px;
+}
+
+#video, #canvas {
+  display: none;
+}
diff --git a/examples/webgpu-video-background-removal/vite.config.js b/examples/webgpu-video-background-removal/vite.config.js
new file mode 100644
index 000000000..6c32f52df
--- /dev/null
+++ b/examples/webgpu-video-background-removal/vite.config.js
@@ -0,0 +1,6 @@
+import { defineConfig } from 'vite';
+export default defineConfig({
+  build: {
+    target: 'esnext'
+  }
+});
diff --git a/examples/webgpu-video-depth-estimation/.gitignore b/examples/webgpu-video-depth-estimation/.gitignore
new file mode 100644
index 000000000..a547bf36d
--- /dev/null
+++ b/examples/webgpu-video-depth-estimation/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/examples/webgpu-video-depth-estimation/index.html b/examples/webgpu-video-depth-estimation/index.html
new file mode 100644
index 000000000..c05574f67
--- /dev/null
+++ b/examples/webgpu-video-depth-estimation/index.html
@@ -0,0 +1,42 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>Transformers.js | Real-time depth estimation</title>
+</head>
+
+<body>
+  <h1>
+    Real-time depth estimation w/
+    <a href="https://huggingface.co/onnx-community/depth-anything-v2-small" target="_blank">Depth Anything V2</a>
+  </h1>
+  <h3>
+    Runs locally in your browser, powered by
+    <a href="https://github.com/huggingface/transformers.js" target="_blank">🤗 Transformers.js</a>
+  </h3>
+  <div id="container">
+    <video id="video" autoplay muted playsinline></video>
+    <canvas id="output-canvas"></canvas>
+  </div>
+  <div id="controls">
+    <div title="Read frames from your webcam and process them at a lower size (lower = faster)">
+      <label>Stream scale</label>
+      (<label id="scale-value">0.4</label>)
+      <br>
+      <input id="scale" type="range" min="0.1" max="1" step="0.1" value="0.4" disabled>
+    </div>
+    <div title="The width of the image (lower = faster)">
+      <label>Image size</label>
+      (<label id="size-value">504</label>px)
+      <br>
+      <input id="size" type="range" min="140" max="840" step="14" value="504" disabled>
+    </div>
+  </div>
+  <label id="status">Loading model...</label>
+
+  <script type="module" src="/main.js"></script>
+</body>
+
+</html>
\ No newline at end of file
diff --git a/examples/webgpu-video-depth-estimation/main.js b/examples/webgpu-video-depth-estimation/main.js
new file mode 100644
index 000000000..a745da774
--- /dev/null
+++ b/examples/webgpu-video-depth-estimation/main.js
@@ -0,0 +1,145 @@
+import './style.css';
+
+import { AutoModel, AutoProcessor, RawImage } from '@xenova/transformers';
+
+async function hasFp16() {
+    try {
+        const adapter = await navigator.gpu.requestAdapter()
+        return adapter.features.has('shader-f16')
+    } catch (e) {
+        return false
+    }
+}
+
+// Reference the elements that we will need
+const status = document.getElementById('status');
+const canvas = document.createElement('canvas');
+const outputCanvas = document.getElementById('output-canvas');
+const video = document.getElementById('video');
+const sizeSlider = document.getElementById('size');
+const sizeLabel = document.getElementById('size-value');
+const scaleSlider = document.getElementById('scale');
+const scaleLabel = document.getElementById('scale-value');
+
+function setStreamSize(width, height) {
+    video.width = outputCanvas.width = canvas.width = Math.round(width);
+    video.height = outputCanvas.height = canvas.height = Math.round(height);
+}
+
+status.textContent = 'Loading model...';
+
+// Load model and processor
+const model_id = 'onnx-community/depth-anything-v2-small';
+
+let model;
+try {
+    model = await AutoModel.from_pretrained(model_id, {
+        device: 'webgpu',
+        // Use fp16 if available, otherwise use fp32
+        dtype: (await hasFp16()) ? 'fp16' : 'fp32',
+    });
+} catch (err) {
+    status.textContent = err.message;
+    alert(err.message)
+    throw err;
+}
+
+const processor = await AutoProcessor.from_pretrained(model_id);
+
+// Set up controls
+let size = 504;
+processor.feature_extractor.size = { width: size, height: size };
+sizeSlider.addEventListener('input', () => {
+    size = Number(sizeSlider.value);
+    processor.feature_extractor.size = { width: size, height: size };
+    sizeLabel.textContent = size;
+});
+sizeSlider.disabled = false;
+
+let scale = 0.4;
+scaleSlider.addEventListener('input', () => {
+    scale = Number(scaleSlider.value);
+    setStreamSize(video.videoWidth * scale, video.videoHeight * scale);
+    scaleLabel.textContent = scale;
+});
+scaleSlider.disabled = false;
+
+status.textContent = 'Ready';
+
+let isProcessing = false;
+let previousTime;
+const context = canvas.getContext('2d', { willReadFrequently: true });
+const outputContext = outputCanvas.getContext('2d', { willReadFrequently: true });
+function updateCanvas() {
+    const { width, height } = canvas;
+
+    if (!isProcessing) {
+        isProcessing = true;
+        (async function () {
+            // Read the current frame from the video
+            context.drawImage(video, 0, 0, width, height);
+            const currentFrame = context.getImageData(0, 0, width, height);
+            const image = new RawImage(currentFrame.data, width, height, 4);
+
+            // Pre-process image
+            const inputs = await processor(image);
+
+            // Predict depth map
+            const { predicted_depth } = await model(inputs);
+            const data = predicted_depth.data;
+            const [bs, oh, ow] = predicted_depth.dims;
+
+            // Normalize the depth map
+            let min = Infinity;
+            let max = -Infinity;
+            outputCanvas.width = ow;
+            outputCanvas.height = oh;
+            for (let i = 0; i < data.length; ++i) {
+                const v = data[i];
+                if (v < min) min = v;
+                if (v > max) max = v;
+            }
+            const range = max - min;
+
+            const imageData = new Uint8ClampedArray(4 * data.length);
+            for (let i = 0; i < data.length; ++i) {
+                const offset = 4 * i;
+                imageData[offset] = 255; // Set base color to red
+
+                // Set alpha to normalized depth value
+                imageData[offset + 3] = 255 * (1 - (data[i] - min) / range);
+            }
+            const outPixelData = new ImageData(imageData, ow, oh);
+            outputContext.putImageData(outPixelData, 0, 0);
+
+            if (previousTime !== undefined) {
+                const fps = 1000 / (performance.now() - previousTime);
+                status.textContent = `FPS: ${fps.toFixed(2)}`;
+            }
+            previousTime = performance.now();
+
+            isProcessing = false;
+        })();
+    }
+
+    window.requestAnimationFrame(updateCanvas);
+}
+
+// Start the video stream
+navigator.mediaDevices.getUserMedia(
+    { video: { width: 720, height: 720 } }, // Ask for square video
+).then((stream) => {
+    // Set up the video and canvas elements.
+    video.srcObject = stream;
+    video.play();
+
+    const videoTrack = stream.getVideoTracks()[0];
+    const { width, height } = videoTrack.getSettings();
+
+    setStreamSize(width * scale, height * scale);
+
+    // Start the animation loop
+    setTimeout(updateCanvas, 50);
+}).catch((error) => {
+    alert(error);
+});
diff --git a/examples/webgpu-video-depth-estimation/package.json b/examples/webgpu-video-depth-estimation/package.json
new file mode 100644
index 000000000..041dd86e0
--- /dev/null
+++ b/examples/webgpu-video-depth-estimation/package.json
@@ -0,0 +1,17 @@
+{
+  "name": "webgpu-video-depth-estimation",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "preview": "vite preview"
+  },
+  "devDependencies": {
+    "vite": "^5.2.0"
+  },
+  "dependencies": {
+    "@xenova/transformers": "github:xenova/transformers.js#v3"
+  }
+}
diff --git a/examples/webgpu-video-depth-estimation/style.css b/examples/webgpu-video-depth-estimation/style.css
new file mode 100644
index 000000000..bd4796b95
--- /dev/null
+++ b/examples/webgpu-video-depth-estimation/style.css
@@ -0,0 +1,71 @@
+* {
+  box-sizing: border-box;
+  padding: 0;
+  margin: 0;
+  font-family: sans-serif;
+}
+
+html,
+body {
+  height: 100%;
+}
+
+body {
+  padding: 16px 32px;
+}
+
+body,
+#container {
+  display: flex;
+  flex-direction: column;
+  justify-content: center;
+  align-items: center;
+}
+
+#controls {
+  display: flex;
+  padding: 1rem;
+  gap: 1rem;
+}
+
+#controls>div {
+  text-align: center;
+}
+
+h1,
+h3 {
+  text-align: center;
+}
+
+h3 {
+  margin-top: 0.5rem;
+}
+
+#container {
+  display: flex;
+  flex-direction: row;
+  position: relative;
+  max-width: 100%;
+  max-height: 100%;
+  border: 2px dashed #D1D5DB;
+  border-radius: 0.75rem;
+  overflow: hidden;
+  margin-top: 1rem;
+  background-size: 100% 100%;
+  background-position: center;
+  background-repeat: no-repeat;
+}
+#video, #output-canvas {
+  width: 504px;
+  height: 504px;
+}
+
+canvas {
+  width: 100%;
+  height: 100%;
+}
+
+#status {
+  min-height: 16px;
+  margin: 8px 0;
+}
diff --git a/examples/webgpu-video-depth-estimation/vite.config.js b/examples/webgpu-video-depth-estimation/vite.config.js
new file mode 100644
index 000000000..6c32f52df
--- /dev/null
+++ b/examples/webgpu-video-depth-estimation/vite.config.js
@@ -0,0 +1,6 @@
+import { defineConfig } from 'vite';
+export default defineConfig({
+  build: {
+    target: 'esnext'
+  }
+});
diff --git a/examples/webgpu-vlm/.eslintrc.cjs b/examples/webgpu-vlm/.eslintrc.cjs
new file mode 100644
index 000000000..ce8fffe57
--- /dev/null
+++ b/examples/webgpu-vlm/.eslintrc.cjs
@@ -0,0 +1,21 @@
+module.exports = {
+  root: true,
+  env: { browser: true, es2020: true },
+  extends: [
+    'eslint:recommended',
+    'plugin:react/recommended',
+    'plugin:react/jsx-runtime',
+    'plugin:react-hooks/recommended',
+  ],
+  ignorePatterns: ['dist', '.eslintrc.cjs'],
+  parserOptions: { ecmaVersion: 'latest', sourceType: 'module' },
+  settings: { react: { version: '18.2' } },
+  plugins: ['react-refresh'],
+  rules: {
+    'react-refresh/only-export-components': [
+      'warn',
+      { allowConstantExport: true },
+    ],
+    'react/prop-types': 'off'
+  },
+}
diff --git a/examples/webgpu-vlm/.gitignore b/examples/webgpu-vlm/.gitignore
new file mode 100644
index 000000000..a547bf36d
--- /dev/null
+++ b/examples/webgpu-vlm/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/examples/webgpu-vlm/README.md b/examples/webgpu-vlm/README.md
new file mode 100644
index 000000000..f768e33fc
--- /dev/null
+++ b/examples/webgpu-vlm/README.md
@@ -0,0 +1,8 @@
+# React + Vite
+
+This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
+
+Currently, two official plugins are available:
+
+- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh
+- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
diff --git a/examples/webgpu-vlm/index.html b/examples/webgpu-vlm/index.html
new file mode 100644
index 000000000..4ed94aa49
--- /dev/null
+++ b/examples/webgpu-vlm/index.html
@@ -0,0 +1,13 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/svg+xml" href="/vite.svg" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Moondream WebGPU</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>
diff --git a/examples/webgpu-vlm/package.json b/examples/webgpu-vlm/package.json
new file mode 100644
index 000000000..34e6e95e6
--- /dev/null
+++ b/examples/webgpu-vlm/package.json
@@ -0,0 +1,32 @@
+{
+  "name": "webgpu-chat",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "@xenova/transformers": "github:xenova/transformers.js#v3",
+    "dompurify": "^3.1.2",
+    "marked": "^12.0.2",
+    "react": "^18.2.0",
+    "react-dom": "^18.2.0"
+  },
+  "devDependencies": {
+    "@types/react": "^18.2.43",
+    "@types/react-dom": "^18.2.17",
+    "@vitejs/plugin-react": "^4.2.1",
+    "autoprefixer": "^10.4.19",
+    "eslint": "^8.55.0",
+    "eslint-plugin-react": "^7.33.2",
+    "eslint-plugin-react-hooks": "^4.6.0",
+    "eslint-plugin-react-refresh": "^0.4.5",
+    "postcss": "^8.4.38",
+    "tailwindcss": "^3.4.3",
+    "vite": "^5.2.11"
+  }
+}
diff --git a/examples/webgpu-vlm/postcss.config.js b/examples/webgpu-vlm/postcss.config.js
new file mode 100644
index 000000000..2e7af2b7f
--- /dev/null
+++ b/examples/webgpu-vlm/postcss.config.js
@@ -0,0 +1,6 @@
+export default {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+}
diff --git a/examples/webgpu-vlm/public/logo.png b/examples/webgpu-vlm/public/logo.png
new file mode 100644
index 000000000..ee800de7f
Binary files /dev/null and b/examples/webgpu-vlm/public/logo.png differ
diff --git a/examples/webgpu-vlm/src/App.jsx b/examples/webgpu-vlm/src/App.jsx
new file mode 100644
index 000000000..08cf1797b
--- /dev/null
+++ b/examples/webgpu-vlm/src/App.jsx
@@ -0,0 +1,318 @@
+import { useEffect, useState, useRef } from 'react';
+
+import Chat from './components/Chat';
+import ArrowRightIcon from './components/icons/ArrowRightIcon';
+import StopIcon from './components/icons/StopIcon';
+import Progress from './components/Progress';
+import ImageIcon from './components/icons/ImageIcon';
+import ImagePreview from './components/ImagePreview';
+
+const IS_WEBGPU_AVAILABLE = !!navigator.gpu;
+const STICKY_SCROLL_THRESHOLD = 120;
+
+function App() {
+
+  // Create a reference to the worker object.
+  const worker = useRef(null);
+
+  const textareaRef = useRef(null);
+  const chatContainerRef = useRef(null);
+  const imageRef = useRef(null);
+  const imageUploadRef = useRef(null);
+
+  // Model loading and progress
+  const [status, setStatus] = useState(null);
+  const [loadingMessage, setLoadingMessage] = useState('');
+  const [progressItems, setProgressItems] = useState([]);
+  const [isRunning, setIsRunning] = useState(false);
+
+  // Inputs and outputs
+  const [input, setInput] = useState('');
+  const [image, setImage] = useState(null);
+  const [messages, setMessages] = useState([]);
+  const [tps, setTps] = useState(null);
+  const [numTokens, setNumTokens] = useState(null);
+
+  function onEnter(message, image = null) {
+    setMessages(prev => [
+      ...prev,
+      { role: "user", content: message, image },
+    ]);
+    setTps(null);
+    setIsRunning(true);
+    setInput('');
+    setImage(null);
+  }
+
+  useEffect(() => {
+    resizeInput();
+  }, [input]);
+
+  function onInterrupt() {
+    // NOTE: We do not set isRunning to false here because the worker
+    // will send a 'complete' message when it is done.
+    worker.current.postMessage({ type: 'interrupt' });
+  }
+
+  function resizeInput() {
+    if (!textareaRef.current) return;
+
+    const target = textareaRef.current;
+    target.style.height = 'auto';
+    const newHeight = Math.min(Math.max(target.scrollHeight, 24), 200);
+    target.style.height = `${newHeight}px`;
+  }
+
+  // We use the `useEffect` hook to setup the worker as soon as the `App` component is mounted.
+  useEffect(() => {
+    if (!worker.current) {
+      // Create the worker if it does not yet exist.
+      worker.current = new Worker(new URL('./worker.js', import.meta.url), {
+        type: 'module'
+      });
+    }
+
+    // Create a callback function for messages from the worker thread.
+    const onMessageReceived = (e) => {
+      switch (e.data.status) {
+        case 'loading':
+          // Model file start load: add a new progress item to the list.
+          setStatus('loading');
+          setLoadingMessage(e.data.data);
+          break;
+
+        case 'initiate':
+          setProgressItems(prev => [...prev, e.data]);
+          break;
+
+        case 'progress':
+          // Model file progress: update one of the progress items.
+          setProgressItems(
+            prev => prev.map(item => {
+              if (item.file === e.data.file) {
+                return { ...item, ...e.data }
+              }
+              return item;
+            })
+          );
+          break;
+
+        case 'done':
+          // Model file loaded: remove the progress item from the list.
+          setProgressItems(
+            prev => prev.filter(item => item.file !== e.data.file)
+          );
+          break;
+
+        case 'ready':
+          // Pipeline ready: the worker is ready to accept messages.
+          setStatus('ready');
+          break;
+
+        case 'start': {
+          // Start generation
+          setMessages(prev => [...prev, { "role": "assistant", "content": "" }]);
+        }
+          break;
+
+        case 'update': {
+          // Generation update: update the output text.
+          // Parse messages
+          const { output, tps, numTokens } = e.data;
+          setTps(tps);
+          setNumTokens(numTokens)
+          setMessages(prev => {
+            const cloned = [...prev];
+            const last = cloned.at(-1);
+            cloned[cloned.length - 1] = { ...last, content: last.content + output };
+            return cloned;
+          });
+        }
+          break;
+
+        case 'complete':
+          // Generation complete: re-enable the "Generate" button
+          setIsRunning(false);
+          break;
+      }
+    };
+
+    // Attach the callback function as an event listener.
+    worker.current.addEventListener('message', onMessageReceived);
+
+    // Define a cleanup function for when the component is unmounted.
+    return () => {
+      worker.current.removeEventListener('message', onMessageReceived);
+    };
+  }, []);
+
+  // Send the messages to the worker thread whenever the `messages` state changes.
+  useEffect(() => {
+    if (messages.filter(x => x.role === 'user').length === 0) {
+      // No user messages yet: do nothing.
+      return;
+    }
+    if (messages.at(-1).role === 'assistant') {
+      // Do not update if the last message is from the assistant
+      return;
+    }
+    setTps(null);
+    worker.current.postMessage({ type: 'generate', data: messages });
+  }, [messages, isRunning]);
+
+  useEffect(() => {
+    if (!chatContainerRef.current) return;
+    if (isRunning) {
+      const element = chatContainerRef.current;
+      if (element.scrollHeight - element.scrollTop - element.clientHeight < STICKY_SCROLL_THRESHOLD) {
+        element.scrollTop = element.scrollHeight;
+      }
+    }
+  }, [messages, isRunning]);
+
+  return (
+    IS_WEBGPU_AVAILABLE
+      ? (<div className="flex flex-col h-screen mx-auto items justify-end text-gray-800 dark:text-gray-200 bg-white dark:bg-gray-900">
+
+        {status === null && messages.length === 0 && (
+          <div className="h-full overflow-auto scrollbar-thin flex justify-center items-center flex-col relative">
+            <div className="flex flex-col items-center mb-1 max-w-[400px] text-center">
+              <img src="logo.png" width="100%" height="auto" className="block drop-shadow-md px-12"></img>
+              <h1 className="text-4xl font-bold mb-1">Moondream WebGPU</h1>
+              <h2 className="font-semibold text-lg">A private and powerful multimodal AI chatbot that runs locally in your browser.</h2>
+            </div>
+
+            <div className="flex flex-col items-center px-4">
+              <p className="max-w-[514px] mb-4">
+                <br />
+                You are about to load <a href="https://huggingface.co/Xenova/moondream2" target="_blank" rel="noreferrer" className="font-medium underline">moondream2</a>,
+                a 1.86 billion parameter VLM (Vision-Language Model) that is optimized for inference on the web. Once downloaded, the model (1.8&nbsp;GB) will be cached and reused when you revisit the page.<br />
+                <br />
+                Everything runs directly in your browser using <a href="https://huggingface.co/docs/transformers.js" target="_blank" rel="noreferrer" className="underline">🤗&nbsp;Transformers.js</a> and ONNX Runtime Web, meaning your conversations aren&#39;t sent to a server. You can even disconnect from the internet after the model has loaded!
+              </p>
+
+              <button
+                className="border px-4 py-2 rounded-lg bg-blue-400 text-white hover:bg-blue-500 disabled:bg-blue-100 disabled:cursor-not-allowed select-none"
+                onClick={() => {
+                  worker.current.postMessage({ type: 'load' });
+                  setStatus('loading');
+                }}
+                disabled={status !== null}
+              >
+                Load model
+              </button>
+            </div>
+          </div>
+        )}
+        {status === 'loading' && (<>
+          <div className="w-full max-w-[500px] text-left mx-auto p-4 bottom-0 mt-auto">
+            <p className="text-center mb-1">{loadingMessage}</p>
+            {progressItems.map(({ file, progress, total }, i) => (
+              <Progress key={i} text={file} percentage={progress} total={total} />
+            ))}
+          </div>
+        </>)}
+
+        {status === 'ready' && (<div
+          ref={chatContainerRef}
+          className="overflow-y-auto scrollbar-thin w-full flex flex-col items-center h-full"
+        >
+          <Chat messages={messages} />
+          <p className="text-center text-sm min-h-6 text-gray-500 dark:text-gray-300">
+            {tps && messages.length > 0 && (<>
+              {!isRunning &&
+                <span>Generated {numTokens} tokens in {(numTokens / tps).toFixed(2)} seconds&nbsp;&#40;</span>}
+              {<>
+                <span className="font-medium text-center mr-1 text-black dark:text-white">
+                  {tps.toFixed(2)}
+                </span>
+                <span className="text-gray-500 dark:text-gray-300">tokens/second</span>
+              </>}
+              {!isRunning && <>
+                <span className="mr-1">&#41;.</span>
+                <span className="underline cursor-pointer" onClick={() => setMessages([])}>Reset</span>
+              </>}
+            </>)}
+          </p>
+        </div>)}
+
+        <div className="mt-2 border dark:bg-gray-700 rounded-lg w-[600px] max-w-[80%] max-h-[240px] mx-auto relative mb-3 flex">
+          <label
+            htmlFor="file-upload"
+            className={status === 'ready' ? "cursor-pointer" : "cursor-not-allowed pointer-events-none"}
+          >
+            <ImageIcon
+              className={`h-8 w-8 p-1 rounded-md ${status === 'ready' ? "text-gray-800 dark:text-gray-100" : "text-gray-400 dark:text-gray-500"} absolute bottom-3 left-1.5`}
+            ></ImageIcon>
+            <input ref={imageUploadRef} id="file-upload" type="file" accept="image/*" className="hidden" onInput={(e) => {
+              const file = e.target.files[0];
+              if (!file) {
+                return;
+              }
+
+              const reader = new FileReader();
+
+              // Set up a callback when the file is loaded
+              reader.onload = e2 => {
+                setImage(e2.target.result);
+                e.target.value = '';
+              };
+
+              reader.readAsDataURL(file);
+            }}></input>
+          </label>
+          <div className="w-full flex flex-col">
+            {image && (
+              <ImagePreview onRemove={() => {
+                setImage(null);
+              }} src={image} className="w-20 h-20 min-w-20 min-h-20 relative p-2" />
+            )}
+
+            <textarea
+              ref={textareaRef}
+              className="scrollbar-thin w-full pl-11 pr-12 dark:bg-gray-700 py-4 rounded-lg bg-transparent border-none outline-none text-gray-800 disabled:text-gray-400 dark:text-gray-100 placeholder-gray-500 disabled:placeholder-gray-200 dark:placeholder-gray-300 dark:disabled:placeholder-gray-500 resize-none disabled:cursor-not-allowed"
+              placeholder="Type your message..."
+              type="text"
+              rows={1}
+              value={input}
+              disabled={status !== 'ready'}
+              title={status === 'ready' ? "Model is ready" : "Model not loaded yet"}
+              onKeyDown={(e) => {
+                if (input.length > 0 && !isRunning && (e.key === "Enter" && !e.shiftKey)) {
+                  e.preventDefault(); // Prevent default behavior of Enter key
+                  onEnter(input, image);
+                }
+              }}
+              onInput={(e) => setInput(e.target.value)}
+            />
+          </div>
+
+          {isRunning
+            ? (<div className="cursor-pointer" onClick={onInterrupt}>
+              <StopIcon
+                className="h-8 w-8 p-1 rounded-md text-gray-800 dark:text-gray-100 absolute right-3 bottom-3"
+              />
+            </div>)
+            : input.length > 0
+              ? (<div className="cursor-pointer" onClick={() => onEnter(input, image)}>
+                <ArrowRightIcon
+                  className="h-8 w-8 p-1 bg-gray-800 dark:bg-gray-100 text-white dark:text-black rounded-md absolute right-3 bottom-3"
+                />
+              </div>)
+              : (<div>
+                <ArrowRightIcon
+                  className="h-8 w-8 p-1 bg-gray-200 dark:bg-gray-600 text-gray-50 dark:text-gray-800 rounded-md absolute right-3 bottom-3"
+                />
+              </div>)
+          }
+        </div>
+
+        <p className="text-xs text-gray-400 text-center mb-3">
+          Disclaimer: Generated content may be inaccurate or false.
+        </p>
+      </div>)
+      : (<div className="fixed w-screen h-screen bg-black z-10 bg-opacity-[92%] text-white text-2xl font-semibold flex justify-center items-center text-center">WebGPU is not supported<br />by this browser :&#40;</div>)
+  )
+}
+
+export default App
diff --git a/examples/webgpu-vlm/src/components/Chat.css b/examples/webgpu-vlm/src/components/Chat.css
new file mode 100644
index 000000000..f8ab98d4b
--- /dev/null
+++ b/examples/webgpu-vlm/src/components/Chat.css
@@ -0,0 +1,112 @@
+@scope (.markdown) {
+
+    /* Code blocks */
+    pre {
+        margin: 0.5rem 0;
+        white-space: break-spaces;
+    }
+
+    code {
+        padding: 0.2em 0.4em;
+        border-radius: 4px;
+        font-family: Consolas, Monaco, 'Andale Mono', 'Ubuntu Mono', monospace;
+        font-size: 0.9em;
+    }
+
+    pre,
+    code {
+        background-color: #f2f2f2;
+    }
+
+    @media (prefers-color-scheme: dark) {
+
+        pre,
+        code {
+            background-color: #333;
+        }
+
+    }
+
+    pre:has(code) {
+        padding: 1rem 0.5rem;
+    }
+
+    pre>code {
+        padding: 0;
+    }
+
+    /* Headings */
+    h1,
+    h2,
+    h3,
+    h4,
+    h5,
+    h6 {
+        font-weight: 600;
+        line-height: 1.2;
+    }
+
+    h1 {
+        font-size: 2em;
+        margin: 1rem 0;
+    }
+
+    h2 {
+        font-size: 1.5em;
+        margin: 0.83rem 0;
+    }
+
+    h3 {
+        font-size: 1.25em;
+        margin: 0.67rem 0;
+    }
+
+    h4 {
+        font-size: 1em;
+        margin: 0.5rem 0;
+    }
+
+    h5 {
+        font-size: 0.875em;
+        margin: 0.33rem 0;
+    }
+
+    h6 {
+        font-size: 0.75em;
+        margin: 0.25rem 0;
+    }
+
+    h1,
+    h2,
+    h3,
+    h4,
+    h5,
+    h6:first-child {
+        margin-top: 0;
+    }
+
+    /* Unordered List */
+    ul {
+        list-style-type: disc;
+        margin-left: 1.5rem;
+    }
+
+    /* Ordered List */
+    ol {
+        list-style-type: decimal;
+        margin-left: 1.5rem;
+    }
+
+    /* List Items */
+    li {
+        margin: 0.25rem 0;
+    }
+
+    p:not(:first-child) {
+        margin-top: 0.75rem;
+    }
+
+    p:not(:last-child) {
+        margin-bottom: 0.75rem;
+    }
+}
\ No newline at end of file
diff --git a/examples/webgpu-vlm/src/components/Chat.jsx b/examples/webgpu-vlm/src/components/Chat.jsx
new file mode 100644
index 000000000..49516896c
--- /dev/null
+++ b/examples/webgpu-vlm/src/components/Chat.jsx
@@ -0,0 +1,43 @@
+import { marked } from 'marked';
+import DOMPurify from 'dompurify';
+
+import BotIcon from './icons/BotIcon';
+import UserIcon from './icons/UserIcon';
+
+import './Chat.css';
+
+export default function Chat({ messages }) {
+    const empty = messages.length === 0;
+
+    return (<div className={`flex-1 p-6 max-w-[960px] w-full ${empty ? 'flex flex-col items-center justify-end' : 'space-y-4'}`}>
+        {empty
+            ? <div className="text-xl">Ready!</div>
+            : messages.map((msg, i) => (
+                <div key={`message-${i}`} className="flex items-start space-x-4">
+                    {msg.role === 'assistant'
+                        ? (<>
+                            <BotIcon className="h-6 w-6 min-h-6 min-w-6 my-3 text-gray-500 dark:text-gray-300" />
+                            <div className="bg-gray-200 dark:bg-gray-700 rounded-lg p-4">
+                                <p className="min-h-6 text-gray-800 dark:text-gray-200 overflow-wrap-anywhere">{
+                                    msg.content.length > 0
+                                        ? <span className="markdown" dangerouslySetInnerHTML={{ __html: DOMPurify.sanitize(marked.parse(msg.content)) }} />
+                                        : (<span className="h-6 flex items-center gap-1">
+                                            <span className="w-2.5 h-2.5 bg-gray-600 dark:bg-gray-300 rounded-full animate-pulse"></span>
+                                            <span className="w-2.5 h-2.5 bg-gray-600 dark:bg-gray-300 rounded-full animate-pulse animation-delay-200"></span>
+                                            <span className="w-2.5 h-2.5 bg-gray-600 dark:bg-gray-300 rounded-full animate-pulse animation-delay-400"></span>
+                                        </span>)
+                                }</p>
+                            </div>
+                        </>
+                        ) : (<>
+                            <UserIcon className="h-6 w-6 min-h-6 min-w-6 my-3 text-gray-500 dark:text-gray-300" />
+                            <div className="bg-blue-500 text-white rounded-lg p-4">
+                                {msg.image && <img src={msg.image} className="max-w-full max-h-64 rounded-md mb-3" />}
+                                <p className="min-h-6 overflow-wrap-anywhere">{msg.content}</p>
+                            </div>
+                        </>)
+                    }
+                </div>
+            ))}
+    </div>)
+}
diff --git a/examples/webgpu-vlm/src/components/ImagePreview.jsx b/examples/webgpu-vlm/src/components/ImagePreview.jsx
new file mode 100644
index 000000000..9e5ccc0c9
--- /dev/null
+++ b/examples/webgpu-vlm/src/components/ImagePreview.jsx
@@ -0,0 +1,16 @@
+import { useState } from "react";
+import CrossIcon from "./icons/CrossIcon"
+
+export default function ImagePreview({ src, onRemove, ...props }) {
+    const [hover, setHover] = useState(false);
+
+    return (
+        <div
+            {...props}
+            onMouseEnter={() => setHover(true)}
+            onMouseLeave={() => setHover(false)}
+        >
+            <CrossIcon onClick={onRemove} className={`absolute top-0 right-0 cursor-pointer dark:fill-gray-400 dark:text-gray-100 fill-gray-200 text-gray-800 ${hover ? '' : 'hidden'}`} />
+            <img src={src} alt="Upload preview" className="w-full h-full object-cover rounded-md" />
+        </div>)
+}
diff --git a/examples/webgpu-vlm/src/components/Progress.jsx b/examples/webgpu-vlm/src/components/Progress.jsx
new file mode 100644
index 000000000..9ce024cc8
--- /dev/null
+++ b/examples/webgpu-vlm/src/components/Progress.jsx
@@ -0,0 +1,15 @@
+function formatBytes(size) {
+    const i = size == 0 ? 0 : Math.floor(Math.log(size) / Math.log(1024));
+    return +((size / Math.pow(1024, i)).toFixed(2)) * 1 + ['B', 'kB', 'MB', 'GB', 'TB'][i];
+}
+
+export default function Progress({ text, percentage, total }) {
+    percentage ??= 0;
+    return (
+        <div className="w-full bg-gray-100 dark:bg-gray-700 text-left rounded-lg overflow-hidden mb-0.5">
+            <div className="bg-blue-400 whitespace-nowrap px-1 text-sm" style={{ width: `${percentage}%` }}>
+                {text} ({percentage.toFixed(2)}%{isNaN(total) ? '' : ` of ${formatBytes(total)}`})
+            </div>
+        </div>
+    );
+}
diff --git a/examples/webgpu-vlm/src/components/icons/ArrowRightIcon.jsx b/examples/webgpu-vlm/src/components/icons/ArrowRightIcon.jsx
new file mode 100644
index 000000000..0ca5ed917
--- /dev/null
+++ b/examples/webgpu-vlm/src/components/icons/ArrowRightIcon.jsx
@@ -0,0 +1,19 @@
+export default function ArrowRightIcon(props) {
+    return (
+        <svg
+            {...props}
+            xmlns="http://www.w3.org/2000/svg"
+            width="24"
+            height="24"
+            viewBox="0 0 24 24"
+            fill="none"
+            stroke="currentColor"
+            strokeWidth="2"
+            strokeLinecap="round"
+            strokeLinejoin="round"
+        >
+            <path d="M5 12h14" />
+            <path d="m12 5 7 7-7 7" />
+        </svg>
+    )
+}
\ No newline at end of file
diff --git a/examples/webgpu-vlm/src/components/icons/BotIcon.jsx b/examples/webgpu-vlm/src/components/icons/BotIcon.jsx
new file mode 100644
index 000000000..b8bd0ceae
--- /dev/null
+++ b/examples/webgpu-vlm/src/components/icons/BotIcon.jsx
@@ -0,0 +1,23 @@
+export default function BotIcon(props) {
+    return (
+        <svg
+            {...props}
+            xmlns="http://www.w3.org/2000/svg"
+            width="24"
+            height="24"
+            viewBox="0 0 24 24"
+            fill="none"
+            stroke="currentColor"
+            strokeWidth="2"
+            strokeLinecap="round"
+            strokeLinejoin="round"
+        >
+            <path d="M12 8V4H8" />
+            <rect width="16" height="12" x="4" y="8" rx="2" />
+            <path d="M2 14h2" />
+            <path d="M20 14h2" />
+            <path d="M15 13v2" />
+            <path d="M9 13v2" />
+        </svg>
+    )
+}
\ No newline at end of file
diff --git a/examples/webgpu-vlm/src/components/icons/CrossIcon.jsx b/examples/webgpu-vlm/src/components/icons/CrossIcon.jsx
new file mode 100644
index 000000000..d2e03d480
--- /dev/null
+++ b/examples/webgpu-vlm/src/components/icons/CrossIcon.jsx
@@ -0,0 +1,18 @@
+export default function CrossIcon(props) {
+    return (
+        <svg
+            {...props}
+            xmlns="http://www.w3.org/2000/svg"
+            width="24"
+            height="24"
+            viewBox="0 0 24 24"
+            fill="none"
+            stroke="currentColor"
+            strokeWidth="2"
+            strokeLinecap="round"
+            strokeLinejoin="round"
+        >
+            <path d="m9.75 9.75 4.5 4.5m0-4.5-4.5 4.5M21 12a9 9 0 1 1-18 0 9 9 0 0 1 18 0Z" />
+        </svg>
+    )
+}
diff --git a/examples/webgpu-vlm/src/components/icons/ImageIcon.jsx b/examples/webgpu-vlm/src/components/icons/ImageIcon.jsx
new file mode 100644
index 000000000..93409108f
--- /dev/null
+++ b/examples/webgpu-vlm/src/components/icons/ImageIcon.jsx
@@ -0,0 +1,19 @@
+export default function ImageIcon(props) {
+    return (
+        <svg
+            {...props}
+            xmlns="http://www.w3.org/2000/svg"
+            width="24"
+            height="24"
+            viewBox="0 0 24 24"
+            fill="none"
+            stroke="currentColor"
+            strokeWidth="2"
+            strokeLinecap="round"
+            strokeLinejoin="round"
+        >
+            <path d="m2.25 15.75 5.159-5.159a2.25 2.25 0 0 1 3.182 0l5.159 5.159m-1.5-1.5 1.409-1.409a2.25 2.25 0 0 1 3.182 0l2.909 2.909m-18 3.75h16.5a1.5 1.5 0 0 0 1.5-1.5V6a1.5 1.5 0 0 0-1.5-1.5H3.75A1.5 1.5 0 0 0 2.25 6v12a1.5 1.5 0 0 0 1.5 1.5Zm10.5-11.25h.008v.008h-.008V8.25Zm.375 0a.375.375 0 1 1-.75 0 .375.375 0 0 1 .75 0Z" />
+        </svg>
+    )
+}
+
diff --git a/examples/webgpu-vlm/src/components/icons/StopIcon.jsx b/examples/webgpu-vlm/src/components/icons/StopIcon.jsx
new file mode 100644
index 000000000..9b97f3723
--- /dev/null
+++ b/examples/webgpu-vlm/src/components/icons/StopIcon.jsx
@@ -0,0 +1,19 @@
+export default function StopIcon(props) {
+    return (
+        <svg
+            {...props}
+            xmlns="http://www.w3.org/2000/svg"
+            width="24"
+            height="24"
+            viewBox="0 0 24 24"
+            fill="none"
+            stroke="currentColor"
+            strokeWidth="2"
+            strokeLinecap="round"
+            strokeLinejoin="round"
+        >
+            <path d="M21 12a9 9 0 1 1-18 0 9 9 0 0 1 18 0Z" />
+            <path fill="currentColor" d="M9 9.563C9 9.252 9.252 9 9.563 9h4.874c.311 0 .563.252.563.563v4.874c0 .311-.252.563-.563.563H9.564A.562.562 0 0 1 9 14.437V9.564Z" />
+        </svg>
+    )
+}
\ No newline at end of file
diff --git a/examples/webgpu-vlm/src/components/icons/UserIcon.jsx b/examples/webgpu-vlm/src/components/icons/UserIcon.jsx
new file mode 100644
index 000000000..cb09e7574
--- /dev/null
+++ b/examples/webgpu-vlm/src/components/icons/UserIcon.jsx
@@ -0,0 +1,19 @@
+export default function UserIcon(props) {
+    return (
+        <svg
+            {...props}
+            xmlns="http://www.w3.org/2000/svg"
+            width="24"
+            height="24"
+            viewBox="0 0 24 24"
+            fill="none"
+            stroke="currentColor"
+            strokeWidth="2"
+            strokeLinecap="round"
+            strokeLinejoin="round"
+        >
+            <path d="M19 21v-2a4 4 0 0 0-4-4H9a4 4 0 0 0-4 4v2" />
+            <circle cx="12" cy="7" r="4" />
+        </svg>
+    )
+}
\ No newline at end of file
diff --git a/examples/webgpu-vlm/src/index.css b/examples/webgpu-vlm/src/index.css
new file mode 100644
index 000000000..8848bbd6d
--- /dev/null
+++ b/examples/webgpu-vlm/src/index.css
@@ -0,0 +1,32 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+@layer utilities {
+  .scrollbar-thin::-webkit-scrollbar {
+    @apply w-2;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-track {
+    @apply rounded-full bg-gray-100 dark:bg-gray-700;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-thumb {
+    @apply rounded-full bg-gray-300 dark:bg-gray-600;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-thumb:hover {
+    @apply bg-gray-500;
+  }
+
+  .animation-delay-200 {
+    animation-delay: 200ms;
+  }
+  .animation-delay-400 {
+    animation-delay: 400ms;
+  }
+
+  .overflow-wrap-anywhere {
+    overflow-wrap: anywhere;
+  }
+}
diff --git a/examples/webgpu-vlm/src/main.jsx b/examples/webgpu-vlm/src/main.jsx
new file mode 100644
index 000000000..54b39dd1d
--- /dev/null
+++ b/examples/webgpu-vlm/src/main.jsx
@@ -0,0 +1,10 @@
+import React from 'react'
+import ReactDOM from 'react-dom/client'
+import App from './App.jsx'
+import './index.css'
+
+ReactDOM.createRoot(document.getElementById('root')).render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>,
+)
diff --git a/examples/webgpu-vlm/src/worker.js b/examples/webgpu-vlm/src/worker.js
new file mode 100644
index 000000000..d145b17a3
--- /dev/null
+++ b/examples/webgpu-vlm/src/worker.js
@@ -0,0 +1,209 @@
+
+import {
+    env,
+    AutoTokenizer,
+    Moondream1ForConditionalGeneration,
+    TextStreamer,
+    StoppingCriteria,
+    RawImage,
+    AutoProcessor,
+    Tensor,
+    full,
+} from '@xenova/transformers';
+
+const DEVICE = 'webgpu';
+const MAX_NEW_TOKENS = 256;
+
+env.backends.onnx.wasm.proxy = DEVICE !== 'webgpu';
+
+async function hasFp16() {
+    try {
+        const adapter = await navigator.gpu.requestAdapter();
+        return adapter.features.has('shader-f16');
+    } catch (e) {
+        return false;
+    }
+}
+/**
+ * This class uses the Singleton pattern to ensure that only one instance of the model is loaded.
+ */
+class TextGenerationPipeline {
+    static model_id = 'Xenova/moondream2';
+    static tokenizer = null;
+    static processor = null;
+    static model = null;
+    static supportsFp16 = null;
+
+    static async getInstance(progress_callback = null) {
+
+        this.tokenizer ??= AutoTokenizer.from_pretrained(this.model_id, {
+            progress_callback,
+        });
+
+        this.processor ??= AutoProcessor.from_pretrained(this.model_id);
+
+        // Choose the model based on whether fp16 is available
+        this.supportsFp16 ??= await hasFp16();
+        this.model ??= Moondream1ForConditionalGeneration.from_pretrained(this.model_id, {
+            dtype: {
+                embed_tokens: this.supportsFp16 ? 'fp16' : 'fp32', // or 'fp32'
+                vision_encoder: this.supportsFp16 ? 'fp16' : 'fp32', // or 'q8'
+                decoder_model_merged: 'q4', // or 'q4f16' or 'q8'
+            },
+            device: DEVICE,
+            progress_callback,
+        });
+
+        return Promise.all([this.tokenizer, this.processor, this.model]);
+    }
+}
+
+
+class CallbackTextStreamer extends TextStreamer {
+    constructor(tokenizer, cb) {
+        super(tokenizer, {
+            skip_prompt: true,
+            skip_special_tokens: true,
+        });
+        this.cb = cb;
+    }
+
+    on_finalized_text(text) {
+        this.cb(text);
+    }
+}
+
+class InterruptableStoppingCriteria extends StoppingCriteria {
+    constructor() {
+        super();
+        this.interrupted = false;
+    }
+
+    interrupt() {
+        this.interrupted = true;
+    }
+
+    reset() {
+        this.interrupted = false;
+    }
+
+    _call(input_ids, scores) {
+        return new Array(input_ids.length).fill(this.interrupted);
+    }
+}
+
+const stopping_criteria = new InterruptableStoppingCriteria();
+
+async function generate(messages) {
+
+    // Only support a single image for now
+    const images = messages.filter(x => x.image).map(x => x.image);
+    if (images.length > 1) {
+        self.postMessage({
+            status: 'error',
+            error: 'Currently, at most one image is supported.',
+        });
+        return;
+    }
+
+    // Retrieve the text-generation pipeline.
+    const [tokenizer, processor, model] = await TextGenerationPipeline.getInstance();
+
+    // Construct and tokenize prompt
+    const prompt = messages.map(x => `${x.image ? '<image>\n\n' : ''}${x.role === 'user' ? 'Question: ' : 'Answer: '}${x.content.trim()}`).join('\n\n') + '\n\nAnswer:'
+    let inputs = tokenizer(prompt);
+
+    if (images.length > 0) {
+        const image = await RawImage.fromURL(images[0]);
+        const vision_inputs = await processor(image);
+
+        inputs = { ...inputs, ...vision_inputs };
+    }
+
+    let startTime;
+    let numTokens = 0;
+    const cb = (output) => {
+        startTime ??= performance.now();
+
+        let tps;
+        if (numTokens++ > 0) {
+            tps = numTokens / (performance.now() - startTime) * 1000;
+        }
+        self.postMessage({
+            status: 'update',
+            output, tps, numTokens,
+        });
+    }
+
+    const streamer = new CallbackTextStreamer(tokenizer, cb);
+
+    // Tell the main thread we are starting
+    self.postMessage({ status: 'start' });
+
+    const outputs = await model.generate({
+        ...inputs,
+        max_new_tokens: MAX_NEW_TOKENS,
+        streamer,
+        stopping_criteria,
+    });
+    const outputText = tokenizer.batch_decode(outputs, { skip_special_tokens: false });
+
+    // Send the output back to the main thread
+    self.postMessage({
+        status: 'complete',
+        output: outputText,
+    });
+}
+
+async function load() {
+    self.postMessage({
+        status: 'loading',
+        data: 'Loading model...'
+    });
+
+    // Load the pipeline and save it for future use.
+    const [tokenizer, processor, model] = await TextGenerationPipeline.getInstance(x => {
+        // We also add a progress callback to the pipeline so that we can
+        // track model loading.
+        self.postMessage(x);
+    });
+
+    self.postMessage({
+        status: 'loading',
+        data: 'Compiling shaders and warming up model...'
+    });
+
+    // Run model with dummy input to compile shaders
+    const text_inputs = tokenizer('a');
+
+    const vision_inputs = {
+        pixel_values: full([1, 3, 378, 378], 0.0)
+    }
+
+    const inputs = { ...text_inputs, ...vision_inputs };
+    await model.generate({ ...inputs, max_new_tokens: 1 });
+    self.postMessage({ status: 'ready' });
+}
+// Listen for messages from the main thread
+self.addEventListener('message', async (e) => {
+    const { type, data } = e.data;
+
+    switch (type) {
+        case 'load':
+            load();
+            break;
+
+        case 'generate':
+            stopping_criteria.reset();
+            generate(data);
+            break;
+
+        case 'interrupt':
+            stopping_criteria.interrupt();
+            break;
+
+        case 'reset':
+            stopping_criteria.reset();
+            break;
+    }
+});
diff --git a/examples/webgpu-vlm/tailwind.config.js b/examples/webgpu-vlm/tailwind.config.js
new file mode 100644
index 000000000..d37737fc0
--- /dev/null
+++ b/examples/webgpu-vlm/tailwind.config.js
@@ -0,0 +1,12 @@
+/** @type {import('tailwindcss').Config} */
+export default {
+  content: [
+    "./index.html",
+    "./src/**/*.{js,ts,jsx,tsx}",
+  ],
+  theme: {
+    extend: {},
+  },
+  plugins: [],
+}
+
diff --git a/examples/webgpu-vlm/vite.config.js b/examples/webgpu-vlm/vite.config.js
new file mode 100644
index 000000000..5a33944a9
--- /dev/null
+++ b/examples/webgpu-vlm/vite.config.js
@@ -0,0 +1,7 @@
+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+
+// https://vitejs.dev/config/
+export default defineConfig({
+  plugins: [react()],
+})
diff --git a/examples/webgpu-whisper/.eslintrc.cjs b/examples/webgpu-whisper/.eslintrc.cjs
new file mode 100644
index 000000000..ce8fffe57
--- /dev/null
+++ b/examples/webgpu-whisper/.eslintrc.cjs
@@ -0,0 +1,21 @@
+module.exports = {
+  root: true,
+  env: { browser: true, es2020: true },
+  extends: [
+    'eslint:recommended',
+    'plugin:react/recommended',
+    'plugin:react/jsx-runtime',
+    'plugin:react-hooks/recommended',
+  ],
+  ignorePatterns: ['dist', '.eslintrc.cjs'],
+  parserOptions: { ecmaVersion: 'latest', sourceType: 'module' },
+  settings: { react: { version: '18.2' } },
+  plugins: ['react-refresh'],
+  rules: {
+    'react-refresh/only-export-components': [
+      'warn',
+      { allowConstantExport: true },
+    ],
+    'react/prop-types': 'off'
+  },
+}
diff --git a/examples/webgpu-whisper/.gitignore b/examples/webgpu-whisper/.gitignore
new file mode 100644
index 000000000..a547bf36d
--- /dev/null
+++ b/examples/webgpu-whisper/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/examples/webgpu-whisper/README.md b/examples/webgpu-whisper/README.md
new file mode 100644
index 000000000..f768e33fc
--- /dev/null
+++ b/examples/webgpu-whisper/README.md
@@ -0,0 +1,8 @@
+# React + Vite
+
+This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
+
+Currently, two official plugins are available:
+
+- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh
+- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
diff --git a/examples/webgpu-whisper/index.html b/examples/webgpu-whisper/index.html
new file mode 100644
index 000000000..da24b23cb
--- /dev/null
+++ b/examples/webgpu-whisper/index.html
@@ -0,0 +1,13 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <link rel="icon" type="image/png" href="/logo.png" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Whisper WebGPU</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>
diff --git a/examples/webgpu-whisper/package.json b/examples/webgpu-whisper/package.json
new file mode 100644
index 000000000..325990590
--- /dev/null
+++ b/examples/webgpu-whisper/package.json
@@ -0,0 +1,30 @@
+{
+  "name": "webgpu-whisper",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "@huggingface/transformers": "^3.0.0-alpha.18",
+    "react": "^18.2.0",
+    "react-dom": "^18.2.0"
+  },
+  "devDependencies": {
+    "@types/react": "^18.2.43",
+    "@types/react-dom": "^18.2.17",
+    "@vitejs/plugin-react": "^4.2.1",
+    "autoprefixer": "^10.4.19",
+    "eslint": "^8.55.0",
+    "eslint-plugin-react": "^7.33.2",
+    "eslint-plugin-react-hooks": "^4.6.0",
+    "eslint-plugin-react-refresh": "^0.4.5",
+    "postcss": "^8.4.38",
+    "tailwindcss": "^3.4.3",
+    "vite": "^5.2.11"
+  }
+}
diff --git a/examples/webgpu-whisper/postcss.config.js b/examples/webgpu-whisper/postcss.config.js
new file mode 100644
index 000000000..2e7af2b7f
--- /dev/null
+++ b/examples/webgpu-whisper/postcss.config.js
@@ -0,0 +1,6 @@
+export default {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+}
diff --git a/examples/webgpu-whisper/public/banner.png b/examples/webgpu-whisper/public/banner.png
new file mode 100644
index 000000000..b9b0e75f4
Binary files /dev/null and b/examples/webgpu-whisper/public/banner.png differ
diff --git a/examples/webgpu-whisper/public/logo.png b/examples/webgpu-whisper/public/logo.png
new file mode 100644
index 000000000..fc3b13f6b
Binary files /dev/null and b/examples/webgpu-whisper/public/logo.png differ
diff --git a/examples/webgpu-whisper/src/App.jsx b/examples/webgpu-whisper/src/App.jsx
new file mode 100644
index 000000000..5f74ecba8
--- /dev/null
+++ b/examples/webgpu-whisper/src/App.jsx
@@ -0,0 +1,257 @@
+import { useEffect, useState, useRef } from 'react';
+
+import { AudioVisualizer } from './components/AudioVisualizer';
+import Progress from './components/Progress';
+import { LanguageSelector } from './components/LanguageSelector';
+
+const IS_WEBGPU_AVAILABLE = !!navigator.gpu;
+
+const WHISPER_SAMPLING_RATE = 16_000;
+const MAX_AUDIO_LENGTH = 30; // seconds
+const MAX_SAMPLES = WHISPER_SAMPLING_RATE * MAX_AUDIO_LENGTH;
+
+function App() {
+
+  // Create a reference to the worker object.
+  const worker = useRef(null);
+
+  const recorderRef = useRef(null);
+
+  // Model loading and progress
+  const [status, setStatus] = useState(null);
+  const [loadingMessage, setLoadingMessage] = useState('');
+  const [progressItems, setProgressItems] = useState([]);
+
+  // Inputs and outputs
+  const [text, setText] = useState('');
+  const [tps, setTps] = useState(null);
+  const [language, setLanguage] = useState('en');
+
+  // Processing
+  const [recording, setRecording] = useState(false);
+  const [isProcessing, setIsProcessing] = useState(false);
+  const [chunks, setChunks] = useState([]);
+  const [stream, setStream] = useState(null);
+  const audioContextRef = useRef(null);
+
+  // We use the `useEffect` hook to setup the worker as soon as the `App` component is mounted.
+  useEffect(() => {
+    if (!worker.current) {
+      // Create the worker if it does not yet exist.
+      worker.current = new Worker(new URL('./worker.js', import.meta.url), {
+        type: 'module'
+      });
+    }
+
+    // Create a callback function for messages from the worker thread.
+    const onMessageReceived = (e) => {
+      switch (e.data.status) {
+        case 'loading':
+          // Model file start load: add a new progress item to the list.
+          setStatus('loading');
+          setLoadingMessage(e.data.data);
+          break;
+
+        case 'initiate':
+          setProgressItems(prev => [...prev, e.data]);
+          break;
+
+        case 'progress':
+          // Model file progress: update one of the progress items.
+          setProgressItems(
+            prev => prev.map(item => {
+              if (item.file === e.data.file) {
+                return { ...item, ...e.data }
+              }
+              return item;
+            })
+          );
+          break;
+
+        case 'done':
+          // Model file loaded: remove the progress item from the list.
+          setProgressItems(
+            prev => prev.filter(item => item.file !== e.data.file)
+          );
+          break;
+
+        case 'ready':
+          // Pipeline ready: the worker is ready to accept messages.
+          setStatus('ready');
+          recorderRef.current?.start();
+          break;
+
+        case 'start': {
+          // Start generation
+          setIsProcessing(true);
+
+          // Request new data from the recorder
+          recorderRef.current?.requestData();
+        }
+          break;
+
+        case 'update': {
+          // Generation update: update the output text.
+          const { tps } = e.data;
+          setTps(tps);
+        }
+          break;
+
+        case 'complete':
+          // Generation complete: re-enable the "Generate" button
+          setIsProcessing(false);
+          setText(e.data.output);
+          break;
+      }
+    };
+
+    // Attach the callback function as an event listener.
+    worker.current.addEventListener('message', onMessageReceived);
+
+    // Define a cleanup function for when the component is unmounted.
+    return () => {
+      worker.current.removeEventListener('message', onMessageReceived);
+    };
+  }, []);
+
+  useEffect(() => {
+    if (recorderRef.current) return; // Already set
+
+    if (navigator.mediaDevices.getUserMedia) {
+      navigator.mediaDevices.getUserMedia({ audio: true })
+        .then(stream => {
+          setStream(stream);
+
+          recorderRef.current = new MediaRecorder(stream);
+          audioContextRef.current = new AudioContext({ sampleRate: WHISPER_SAMPLING_RATE });
+
+          recorderRef.current.onstart = () => {
+            setRecording(true);
+            setChunks([]);
+          }
+          recorderRef.current.ondataavailable = (e) => {
+            if (e.data.size > 0) {
+              setChunks((prev) => [...prev, e.data]);
+            } else {
+              // Empty chunk received, so we request new data after a short timeout
+              setTimeout(() => {
+                recorderRef.current.requestData();
+              }, 25);
+            }
+          };
+
+          recorderRef.current.onstop = () => {
+            setRecording(false);
+          };
+
+        })
+        .catch(err => console.error("The following error occurred: ", err));
+    } else {
+      console.error("getUserMedia not supported on your browser!");
+    }
+
+    return () => {
+      recorderRef.current?.stop();
+      recorderRef.current = null;
+    };
+  }, []);
+
+  useEffect(() => {
+    if (!recorderRef.current) return;
+    if (!recording) return;
+    if (isProcessing) return;
+    if (status !== 'ready') return;
+
+    if (chunks.length > 0) {
+      // Generate from data
+      const blob = new Blob(chunks, { type: recorderRef.current.mimeType });
+
+      const fileReader = new FileReader();
+
+      fileReader.onloadend = async () => {
+        const arrayBuffer = fileReader.result;
+        const decoded = await audioContextRef.current.decodeAudioData(arrayBuffer);
+        let audio = decoded.getChannelData(0);
+        if (audio.length > MAX_SAMPLES) { // Get last MAX_SAMPLES
+          audio = audio.slice(-MAX_SAMPLES);
+        }
+
+        worker.current.postMessage({ type: 'generate', data: { audio, language } });
+      }
+      fileReader.readAsArrayBuffer(blob);
+    } else {
+      recorderRef.current?.requestData();
+    }
+  }, [status, recording, isProcessing, chunks, language]);
+
+  return (
+    IS_WEBGPU_AVAILABLE
+      ? (<div className="flex flex-col h-screen mx-auto justify-end text-gray-800 dark:text-gray-200 bg-white dark:bg-gray-900">
+        {(
+          <div className="h-full overflow-auto scrollbar-thin flex justify-center items-center flex-col relative">
+            <div className="flex flex-col items-center mb-1 max-w-[400px] text-center">
+              <img src="logo.png" width="50%" height="auto" className="block"></img>
+              <h1 className="text-4xl font-bold mb-1">Whisper WebGPU</h1>
+              <h2 className="text-xl font-semibold">Real-time in-browser speech recognition</h2>
+            </div>
+
+            <div className="flex flex-col items-center px-4">
+              {status === null && (<>
+                <p className="max-w-[480px] mb-4">
+                  <br />
+                  You are about to load <a href="https://huggingface.co/onnx-community/whisper-base" target="_blank" rel="noreferrer" className="font-medium underline">whisper-base</a>,
+                  a 73 million parameter speech recognition model that is optimized for inference on the web. Once downloaded, the model (~200&nbsp;MB) will be cached and reused when you revisit the page.<br />
+                  <br />
+                  Everything runs directly in your browser using <a href="https://huggingface.co/docs/transformers.js" target="_blank" rel="noreferrer" className="underline">🤗&nbsp;Transformers.js</a> and ONNX Runtime Web,
+                  meaning no data is sent to a server. You can even disconnect from the internet after the model has loaded!
+                </p>
+
+                <button
+                  className="border px-4 py-2 rounded-lg bg-blue-400 text-white hover:bg-blue-500 disabled:bg-blue-100 disabled:cursor-not-allowed select-none"
+                  onClick={() => {
+                    worker.current.postMessage({ type: 'load' });
+                    setStatus('loading');
+                  }}
+                  disabled={status !== null}
+                >
+                  Load model
+                </button>
+              </>)}
+
+              <div className="w-[500px] p-2">
+                <AudioVisualizer className="w-full rounded-lg" stream={stream} />
+                {status === 'ready' && <div className="relative">
+                  <p className="w-full h-[80px] overflow-y-auto overflow-wrap-anywhere border rounded-lg p-2">{text}</p>
+                  {tps && <span className="absolute bottom-0 right-0 px-1">{tps.toFixed(2)} tok/s</span>}
+                </div>}
+
+              </div>
+              {status === 'ready' && <div className='relative w-full flex justify-center'>
+                <LanguageSelector language={language} setLanguage={(e) => {
+                  recorderRef.current?.stop();
+                  setLanguage(e);
+                  recorderRef.current?.start();
+                }} />
+                <button className="border rounded-lg px-2 absolute right-2" onClick={() => {
+                  recorderRef.current?.stop();
+                  recorderRef.current?.start();
+                }}>Reset</button>
+              </div>
+              }
+              {status === 'loading' && (
+                <div className="w-full max-w-[500px] text-left mx-auto p-4">
+                  <p className="text-center">{loadingMessage}</p>
+                  {progressItems.map(({ file, progress, total }, i) => (
+                    <Progress key={i} text={file} percentage={progress} total={total} />
+                  ))}
+                </div>
+              )}
+            </div>
+          </div>
+        )}
+      </div>)
+      : (<div className="fixed w-screen h-screen bg-black z-10 bg-opacity-[92%] text-white text-2xl font-semibold flex justify-center items-center text-center">WebGPU is not supported<br />by this browser :&#40;</div>)
+  )
+}
+
+export default App
diff --git a/examples/webgpu-whisper/src/components/AudioVisualizer.jsx b/examples/webgpu-whisper/src/components/AudioVisualizer.jsx
new file mode 100644
index 000000000..5935a3e76
--- /dev/null
+++ b/examples/webgpu-whisper/src/components/AudioVisualizer.jsx
@@ -0,0 +1,58 @@
+import { useRef, useCallback, useEffect } from "react";
+
+export function AudioVisualizer({ stream, ...props }) {
+    const canvasRef = useRef(null);
+
+    const visualize = useCallback((stream) => {
+        const audioContext = new (window.AudioContext || window.webkitAudioContext)();
+        const source = audioContext.createMediaStreamSource(stream);
+        const analyser = audioContext.createAnalyser();
+        analyser.fftSize = 2048;
+        source.connect(analyser);
+
+        const canvas = canvasRef.current;
+        const canvasCtx = canvas.getContext('2d');
+        const bufferLength = analyser.frequencyBinCount;
+        const dataArray = new Uint8Array(bufferLength);
+
+        const drawVisual = () => {
+            requestAnimationFrame(drawVisual);
+            analyser.getByteTimeDomainData(dataArray);
+
+            canvasCtx.fillStyle = 'rgb(255, 255, 255)';
+            canvasCtx.fillRect(0, 0, canvas.width, canvas.height);
+
+            canvasCtx.lineWidth = 2;
+            canvasCtx.strokeStyle = 'rgb(0, 0, 0)';
+            canvasCtx.beginPath();
+
+            const sliceWidth = canvas.width * 1.0 / bufferLength;
+
+            let x = 0;
+            for (let i = 0; i < bufferLength; ++i) {
+                const v = dataArray[i] / 128.0;
+                const y = v * canvas.height / 2;
+
+                if (i === 0) {
+                    canvasCtx.moveTo(x, y);
+                } else {
+                    canvasCtx.lineTo(x, y);
+                }
+
+                x += sliceWidth;
+            }
+
+            canvasCtx.lineTo(canvas.width, canvas.height / 2);
+            canvasCtx.stroke();
+        };
+
+        drawVisual();
+    }, []);
+
+    useEffect(() => {
+        stream && visualize(stream);
+    }, [visualize, stream]);
+    return (
+        <canvas {...props} width={720} height={240} ref={canvasRef}></canvas>
+    )
+}
diff --git a/examples/webgpu-whisper/src/components/LanguageSelector.jsx b/examples/webgpu-whisper/src/components/LanguageSelector.jsx
new file mode 100644
index 000000000..9383d640e
--- /dev/null
+++ b/examples/webgpu-whisper/src/components/LanguageSelector.jsx
@@ -0,0 +1,133 @@
+
+function titleCase(str) {
+    str = str.toLowerCase();
+    return (str.match(/\w+.?/g) || [])
+        .map((word) => {
+            return word.charAt(0).toUpperCase() + word.slice(1);
+        })
+        .join("");
+}
+
+// List of supported languages:
+// https://help.openai.com/en/articles/7031512-whisper-api-faq
+// https://github.com/openai/whisper/blob/248b6cb124225dd263bb9bd32d060b6517e067f8/whisper/tokenizer.py#L79
+const LANGUAGES = {
+    en: "english",
+    zh: "chinese",
+    de: "german",
+    es: "spanish/castilian",
+    ru: "russian",
+    ko: "korean",
+    fr: "french",
+    ja: "japanese",
+    pt: "portuguese",
+    tr: "turkish",
+    pl: "polish",
+    ca: "catalan/valencian",
+    nl: "dutch/flemish",
+    ar: "arabic",
+    sv: "swedish",
+    it: "italian",
+    id: "indonesian",
+    hi: "hindi",
+    fi: "finnish",
+    vi: "vietnamese",
+    he: "hebrew",
+    uk: "ukrainian",
+    el: "greek",
+    ms: "malay",
+    cs: "czech",
+    ro: "romanian/moldavian/moldovan",
+    da: "danish",
+    hu: "hungarian",
+    ta: "tamil",
+    no: "norwegian",
+    th: "thai",
+    ur: "urdu",
+    hr: "croatian",
+    bg: "bulgarian",
+    lt: "lithuanian",
+    la: "latin",
+    mi: "maori",
+    ml: "malayalam",
+    cy: "welsh",
+    sk: "slovak",
+    te: "telugu",
+    fa: "persian",
+    lv: "latvian",
+    bn: "bengali",
+    sr: "serbian",
+    az: "azerbaijani",
+    sl: "slovenian",
+    kn: "kannada",
+    et: "estonian",
+    mk: "macedonian",
+    br: "breton",
+    eu: "basque",
+    is: "icelandic",
+    hy: "armenian",
+    ne: "nepali",
+    mn: "mongolian",
+    bs: "bosnian",
+    kk: "kazakh",
+    sq: "albanian",
+    sw: "swahili",
+    gl: "galician",
+    mr: "marathi",
+    pa: "punjabi/panjabi",
+    si: "sinhala/sinhalese",
+    km: "khmer",
+    sn: "shona",
+    yo: "yoruba",
+    so: "somali",
+    af: "afrikaans",
+    oc: "occitan",
+    ka: "georgian",
+    be: "belarusian",
+    tg: "tajik",
+    sd: "sindhi",
+    gu: "gujarati",
+    am: "amharic",
+    yi: "yiddish",
+    lo: "lao",
+    uz: "uzbek",
+    fo: "faroese",
+    ht: "haitian creole/haitian",
+    ps: "pashto/pushto",
+    tk: "turkmen",
+    nn: "nynorsk",
+    mt: "maltese",
+    sa: "sanskrit",
+    lb: "luxembourgish/letzeburgesch",
+    my: "myanmar/burmese",
+    bo: "tibetan",
+    tl: "tagalog",
+    mg: "malagasy",
+    as: "assamese",
+    tt: "tatar",
+    haw: "hawaiian",
+    ln: "lingala",
+    ha: "hausa",
+    ba: "bashkir",
+    jw: "javanese",
+    su: "sundanese",
+};
+export function LanguageSelector({ language, setLanguage }) {
+    const handleLanguageChange = (event) => {
+        setLanguage(event.target.value);
+    };
+
+    const names = Object.values(LANGUAGES).map(titleCase);
+
+    return (
+        <select
+            className="border rounded-lg p-2 max-w-[100px]"
+            value={language} onChange={handleLanguageChange}>
+            {Object.keys(LANGUAGES).map((key, i) => (
+                <option key={key} value={key}>
+                    {names[i]}
+                </option>
+            ))}
+        </select>
+    );
+}
\ No newline at end of file
diff --git a/examples/webgpu-whisper/src/components/Progress.jsx b/examples/webgpu-whisper/src/components/Progress.jsx
new file mode 100644
index 000000000..9ce024cc8
--- /dev/null
+++ b/examples/webgpu-whisper/src/components/Progress.jsx
@@ -0,0 +1,15 @@
+function formatBytes(size) {
+    const i = size == 0 ? 0 : Math.floor(Math.log(size) / Math.log(1024));
+    return +((size / Math.pow(1024, i)).toFixed(2)) * 1 + ['B', 'kB', 'MB', 'GB', 'TB'][i];
+}
+
+export default function Progress({ text, percentage, total }) {
+    percentage ??= 0;
+    return (
+        <div className="w-full bg-gray-100 dark:bg-gray-700 text-left rounded-lg overflow-hidden mb-0.5">
+            <div className="bg-blue-400 whitespace-nowrap px-1 text-sm" style={{ width: `${percentage}%` }}>
+                {text} ({percentage.toFixed(2)}%{isNaN(total) ? '' : ` of ${formatBytes(total)}`})
+            </div>
+        </div>
+    );
+}
diff --git a/examples/webgpu-whisper/src/index.css b/examples/webgpu-whisper/src/index.css
new file mode 100644
index 000000000..8848bbd6d
--- /dev/null
+++ b/examples/webgpu-whisper/src/index.css
@@ -0,0 +1,32 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+@layer utilities {
+  .scrollbar-thin::-webkit-scrollbar {
+    @apply w-2;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-track {
+    @apply rounded-full bg-gray-100 dark:bg-gray-700;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-thumb {
+    @apply rounded-full bg-gray-300 dark:bg-gray-600;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-thumb:hover {
+    @apply bg-gray-500;
+  }
+
+  .animation-delay-200 {
+    animation-delay: 200ms;
+  }
+  .animation-delay-400 {
+    animation-delay: 400ms;
+  }
+
+  .overflow-wrap-anywhere {
+    overflow-wrap: anywhere;
+  }
+}
diff --git a/examples/webgpu-whisper/src/main.jsx b/examples/webgpu-whisper/src/main.jsx
new file mode 100644
index 000000000..54b39dd1d
--- /dev/null
+++ b/examples/webgpu-whisper/src/main.jsx
@@ -0,0 +1,10 @@
+import React from 'react'
+import ReactDOM from 'react-dom/client'
+import App from './App.jsx'
+import './index.css'
+
+ReactDOM.createRoot(document.getElementById('root')).render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>,
+)
diff --git a/examples/webgpu-whisper/src/worker.js b/examples/webgpu-whisper/src/worker.js
new file mode 100644
index 000000000..12735b1cb
--- /dev/null
+++ b/examples/webgpu-whisper/src/worker.js
@@ -0,0 +1,134 @@
+
+import {
+    AutoTokenizer,
+    AutoProcessor,
+    WhisperForConditionalGeneration,
+    TextStreamer,
+    full,
+} from '@huggingface/transformers';
+
+
+const MAX_NEW_TOKENS = 64;
+
+/**
+ * This class uses the Singleton pattern to ensure that only one instance of the model is loaded.
+ */
+class AutomaticSpeechRecognitionPipeline {
+    static model_id = null;
+    static tokenizer = null;
+    static processor = null;
+    static model = null;
+
+    static async getInstance(progress_callback = null) {
+        this.model_id = 'onnx-community/whisper-base';
+
+        this.tokenizer ??= AutoTokenizer.from_pretrained(this.model_id, {
+            progress_callback,
+        });
+        this.processor ??= AutoProcessor.from_pretrained(this.model_id, {
+            progress_callback,
+        });
+
+        this.model ??= WhisperForConditionalGeneration.from_pretrained(this.model_id, {
+            dtype: {
+                encoder_model: 'fp32', // 'fp16' works too
+                decoder_model_merged: 'q4', // or 'fp32' ('fp16' is broken)
+            },
+            device: 'webgpu',
+            progress_callback,
+        });
+
+        return Promise.all([this.tokenizer, this.processor, this.model]);
+    }
+}
+
+let processing = false;
+async function generate({ audio, language }) {
+    if (processing) return;
+    processing = true;
+
+    // Tell the main thread we are starting
+    self.postMessage({ status: 'start' });
+
+    // Retrieve the text-generation pipeline.
+    const [tokenizer, processor, model] = await AutomaticSpeechRecognitionPipeline.getInstance();
+
+    let startTime;
+    let numTokens = 0;
+    const callback_function = (output) => {
+        startTime ??= performance.now();
+
+        let tps;
+        if (numTokens++ > 0) {
+            tps = numTokens / (performance.now() - startTime) * 1000;
+        }
+        self.postMessage({
+            status: 'update',
+            output, tps, numTokens,
+        });
+    }
+
+    const streamer = new TextStreamer(tokenizer, {
+        skip_prompt: true,
+        skip_special_tokens: true,
+        callback_function,
+    });
+
+    const inputs = await processor(audio);
+
+    const outputs = await model.generate({
+        ...inputs,
+        max_new_tokens: MAX_NEW_TOKENS,
+        language,
+        streamer,
+    });
+
+    const outputText = tokenizer.batch_decode(outputs, { skip_special_tokens: true });
+
+    // Send the output back to the main thread
+    self.postMessage({
+        status: 'complete',
+        output: outputText,
+    });
+    processing = false;
+}
+
+async function load() {
+    self.postMessage({
+        status: 'loading',
+        data: 'Loading model...'
+    });
+
+    // Load the pipeline and save it for future use.
+    const [tokenizer, processor, model] = await AutomaticSpeechRecognitionPipeline.getInstance(x => {
+        // We also add a progress callback to the pipeline so that we can
+        // track model loading.
+        self.postMessage(x);
+    });
+
+    self.postMessage({
+        status: 'loading',
+        data: 'Compiling shaders and warming up model...'
+    });
+
+    // Run model with dummy input to compile shaders
+    await model.generate({
+        input_features: full([1, 80, 3000], 0.0),
+        max_new_tokens: 1,
+    });
+    self.postMessage({ status: 'ready' });
+}
+// Listen for messages from the main thread
+self.addEventListener('message', async (e) => {
+    const { type, data } = e.data;
+
+    switch (type) {
+        case 'load':
+            load();
+            break;
+
+        case 'generate':
+            generate(data);
+            break;
+    }
+});
diff --git a/examples/webgpu-whisper/tailwind.config.js b/examples/webgpu-whisper/tailwind.config.js
new file mode 100644
index 000000000..d37737fc0
--- /dev/null
+++ b/examples/webgpu-whisper/tailwind.config.js
@@ -0,0 +1,12 @@
+/** @type {import('tailwindcss').Config} */
+export default {
+  content: [
+    "./index.html",
+    "./src/**/*.{js,ts,jsx,tsx}",
+  ],
+  theme: {
+    extend: {},
+  },
+  plugins: [],
+}
+
diff --git a/examples/webgpu-whisper/vite.config.js b/examples/webgpu-whisper/vite.config.js
new file mode 100644
index 000000000..5a33944a9
--- /dev/null
+++ b/examples/webgpu-whisper/vite.config.js
@@ -0,0 +1,7 @@
+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+
+// https://vitejs.dev/config/
+export default defineConfig({
+  plugins: [react()],
+})
diff --git a/examples/whisper-word-timestamps/.eslintrc.cjs b/examples/whisper-word-timestamps/.eslintrc.cjs
new file mode 100644
index 000000000..3e212e1d4
--- /dev/null
+++ b/examples/whisper-word-timestamps/.eslintrc.cjs
@@ -0,0 +1,21 @@
+module.exports = {
+  root: true,
+  env: { browser: true, es2020: true },
+  extends: [
+    'eslint:recommended',
+    'plugin:react/recommended',
+    'plugin:react/jsx-runtime',
+    'plugin:react-hooks/recommended',
+  ],
+  ignorePatterns: ['dist', '.eslintrc.cjs'],
+  parserOptions: { ecmaVersion: 'latest', sourceType: 'module' },
+  settings: { react: { version: '18.2' } },
+  plugins: ['react-refresh'],
+  rules: {
+    'react/jsx-no-target-blank': 'off',
+    'react-refresh/only-export-components': [
+      'warn',
+      { allowConstantExport: true },
+    ],
+  },
+}
diff --git a/examples/whisper-word-timestamps/.gitignore b/examples/whisper-word-timestamps/.gitignore
new file mode 100644
index 000000000..a547bf36d
--- /dev/null
+++ b/examples/whisper-word-timestamps/.gitignore
@@ -0,0 +1,24 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+
+node_modules
+dist
+dist-ssr
+*.local
+
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?
diff --git a/examples/whisper-word-timestamps/README.md b/examples/whisper-word-timestamps/README.md
new file mode 100644
index 000000000..f768e33fc
--- /dev/null
+++ b/examples/whisper-word-timestamps/README.md
@@ -0,0 +1,8 @@
+# React + Vite
+
+This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
+
+Currently, two official plugins are available:
+
+- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react/README.md) uses [Babel](https://babeljs.io/) for Fast Refresh
+- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
diff --git a/examples/whisper-word-timestamps/index.html b/examples/whisper-word-timestamps/index.html
new file mode 100644
index 000000000..5f620b00e
--- /dev/null
+++ b/examples/whisper-word-timestamps/index.html
@@ -0,0 +1,12 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Whisper Timestamped</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>
diff --git a/examples/whisper-word-timestamps/package.json b/examples/whisper-word-timestamps/package.json
new file mode 100644
index 000000000..3af99d9ef
--- /dev/null
+++ b/examples/whisper-word-timestamps/package.json
@@ -0,0 +1,30 @@
+{
+  "name": "whisper-word-timestamps",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "@xenova/transformers": "github:xenova/transformers.js#v3",
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1"
+  },
+  "devDependencies": {
+    "@types/react": "^18.3.3",
+    "@types/react-dom": "^18.3.0",
+    "@vitejs/plugin-react": "^4.3.1",
+    "autoprefixer": "^10.4.19",
+    "eslint": "^8.57.0",
+    "eslint-plugin-react": "^7.34.2",
+    "eslint-plugin-react-hooks": "^4.6.2",
+    "eslint-plugin-react-refresh": "^0.4.7",
+    "postcss": "^8.4.38",
+    "tailwindcss": "^3.4.4",
+    "vite": "^5.3.1"
+  }
+}
diff --git a/examples/whisper-word-timestamps/postcss.config.js b/examples/whisper-word-timestamps/postcss.config.js
new file mode 100644
index 000000000..2e7af2b7f
--- /dev/null
+++ b/examples/whisper-word-timestamps/postcss.config.js
@@ -0,0 +1,6 @@
+export default {
+  plugins: {
+    tailwindcss: {},
+    autoprefixer: {},
+  },
+}
diff --git a/examples/whisper-word-timestamps/src/App.jsx b/examples/whisper-word-timestamps/src/App.jsx
new file mode 100644
index 000000000..c7b6e89fc
--- /dev/null
+++ b/examples/whisper-word-timestamps/src/App.jsx
@@ -0,0 +1,217 @@
+import { useEffect, useState, useRef, useCallback } from 'react';
+
+import Progress from './components/Progress';
+import MediaInput from './components/MediaInput';
+import Transcript from './components/Transcript';
+import LanguageSelector from './components/LanguageSelector';
+
+
+async function hasWebGPU() {
+  if (!navigator.gpu) {
+    return false;
+  }
+  try {
+    const adapter = await navigator.gpu.requestAdapter();
+    return !!adapter;
+  } catch (e) {
+    return false;
+  }
+}
+
+function App() {
+
+  // Create a reference to the worker object.
+  const worker = useRef(null);
+
+  // Model loading and progress
+  const [status, setStatus] = useState(null);
+  const [loadingMessage, setLoadingMessage] = useState('');
+  const [progressItems, setProgressItems] = useState([]);
+
+  const mediaInputRef = useRef(null);
+  const [audio, setAudio] = useState(null);
+  const [language, setLanguage] = useState('en');
+
+  const [result, setResult] = useState(null);
+  const [time, setTime] = useState(null);
+  const [currentTime, setCurrentTime] = useState(0);
+
+  const [device, setDevice] = useState('webgpu'); // Try use WebGPU first
+  const [modelSize, setModelSize] = useState('gpu' in navigator ? 196 : 77); // WebGPU=196MB, WebAssembly=77MB
+  useEffect(() => {
+    hasWebGPU().then((result) => {
+      setModelSize(result ? 196 : 77);
+      setDevice(result ? 'webgpu' : 'wasm');
+    });
+  }, []);
+
+  // We use the `useEffect` hook to setup the worker as soon as the `App` component is mounted.
+  useEffect(() => {
+    if (!worker.current) {
+      // Create the worker if it does not yet exist.
+      worker.current = new Worker(new URL('./worker.js', import.meta.url), {
+        type: 'module'
+      });
+    }
+
+    // Create a callback function for messages from the worker thread.
+    const onMessageReceived = (e) => {
+      switch (e.data.status) {
+        case 'loading':
+          // Model file start load: add a new progress item to the list.
+          setStatus('loading');
+          setLoadingMessage(e.data.data);
+          break;
+
+        case 'initiate':
+          setProgressItems(prev => [...prev, e.data]);
+          break;
+
+        case 'progress':
+          // Model file progress: update one of the progress items.
+          setProgressItems(
+            prev => prev.map(item => {
+              if (item.file === e.data.file) {
+                return { ...item, ...e.data }
+              }
+              return item;
+            })
+          );
+          break;
+
+        case 'done':
+          // Model file loaded: remove the progress item from the list.
+          setProgressItems(
+            prev => prev.filter(item => item.file !== e.data.file)
+          );
+          break;
+
+        case 'ready':
+          // Pipeline ready: the worker is ready to accept messages.
+          setStatus('ready');
+          break;
+
+        case 'complete':
+          setResult(e.data.result);
+          setTime(e.data.time);
+          setStatus('ready');
+          break;
+      }
+    };
+
+    // Attach the callback function as an event listener.
+    worker.current.addEventListener('message', onMessageReceived);
+
+    // Define a cleanup function for when the component is unmounted.
+    return () => {
+      worker.current.removeEventListener('message', onMessageReceived);
+    };
+  }, []);
+
+  const handleClick = useCallback(() => {
+    setResult(null);
+    setTime(null);
+    if (status === null) {
+      setStatus('loading');
+      worker.current.postMessage({ type: 'load', data: { device } });
+    } else {
+      setStatus('running');
+      worker.current.postMessage({
+        type: 'run', data: { audio, language }
+      });
+    }
+  }, [status, audio, language, device]);
+
+  return (
+    <div className="flex flex-col h-screen mx-auto items justify-end text-gray-800 dark:text-gray-200 bg-white dark:bg-gray-900 max-w-[560px]">
+
+      {status === 'loading' && (
+        <div className="flex justify-center items-center fixed w-screen h-screen bg-black z-10 bg-opacity-[92%] top-0 left-0">
+          <div className="w-[500px]">
+            <p className="text-center mb-1 text-white text-md">{loadingMessage}</p>
+            {progressItems.map(({ file, progress, total }, i) => (
+              <Progress key={i} text={file} percentage={progress} total={total} />
+            ))}
+          </div>
+        </div>
+      )}
+      <div className="h-full flex justify-center items-center flex-col relative">
+        <div className="flex flex-col items-center mb-1 text-center">
+          <h1 className="text-5xl font-bold mb-2">Whisper Timestamped</h1>
+          <h2 className="text-xl font-semibold">In-browser speech recognition w/ word-level timestamps</h2>
+        </div>
+
+        <div className="w-full min-h-[220px] flex flex-col justify-center items-center p-2">
+          {
+            !audio && (
+              <p className="mb-2">
+                You are about to download <a href="https://huggingface.co/onnx-community/whisper-base_timestamped" target="_blank" rel="noreferrer" className="font-medium underline">whisper-base (timestamped)</a>,
+                a 73 million parameter speech recognition model with the ability to generate word-level timestamps across 100 different languages.
+                Once loaded, the model ({modelSize}&nbsp;MB) will be cached and reused when you revisit the page.<br />
+                <br />
+                Everything runs locally in your browser using <a href="https://huggingface.co/docs/transformers.js" target="_blank" rel="noreferrer" className="underline">🤗&nbsp;Transformers.js</a> and ONNX Runtime Web,
+                meaning no API calls are made to a server for inference. You can even disconnect from the internet after the model has loaded!
+              </p>
+            )
+          }
+
+          <div className="flex flex-col w-full m-3">
+            <span className="text-sm mb-0.5">Input audio/video</span>
+            <MediaInput
+              ref={mediaInputRef}
+              className="flex items-center border rounded-md cursor-pointer min-h-[100px] max-h-[500px] overflow-hidden"
+              onInputChange={(result) => setAudio(result)}
+              onTimeUpdate={(time) => setCurrentTime(time)}
+            />
+          </div>
+
+          <div className="relative w-full flex justify-center items-center">
+            <button
+              className="border px-4 py-2 rounded-lg bg-blue-400 text-white hover:bg-blue-500 disabled:bg-blue-100 disabled:cursor-not-allowed select-none"
+              onClick={handleClick}
+              disabled={status === 'running' || (status !== null && audio === null)}
+            >
+              {status === null ? 'Load model' :
+                status === 'running'
+                  ? 'Running...'
+                  : 'Run model'
+              }
+            </button>
+
+            {status !== null &&
+              <div className='absolute right-0 bottom-0'>
+                <span className="text-xs">Language:</span>
+                <br />
+                <LanguageSelector className="border rounded-lg p-1 max-w-[100px]" language={language} setLanguage={setLanguage} />
+              </div>
+            }
+          </div>
+
+          {
+            result && time && (
+              <>
+                <div className="w-full mt-4 border rounded-md">
+                  <Transcript
+                    className="p-2 max-h-[200px] overflow-y-auto scrollbar-thin select-none"
+                    transcript={result}
+                    currentTime={currentTime}
+                    setCurrentTime={(time) => {
+                      setCurrentTime(time);
+                      mediaInputRef.current.setMediaTime(time);
+                    }}
+                  />
+                </div>
+                <p className="text-sm text-gray-600 text-end p-1">Generation time: <span className="text-gray-800 font-semibold">{time.toFixed(2)}ms</span></p>
+              </>
+            )
+
+
+          }
+        </div>
+      </div>
+
+    </div >
+  )
+}
+
+export default App
diff --git a/examples/whisper-word-timestamps/src/components/LanguageSelector.jsx b/examples/whisper-word-timestamps/src/components/LanguageSelector.jsx
new file mode 100644
index 000000000..74c02a62a
--- /dev/null
+++ b/examples/whisper-word-timestamps/src/components/LanguageSelector.jsx
@@ -0,0 +1,134 @@
+
+function titleCase(str) {
+    str = str.toLowerCase();
+    return (str.match(/\w+.?/g) || [])
+        .map((word) => {
+            return word.charAt(0).toUpperCase() + word.slice(1);
+        })
+        .join("");
+}
+
+// List of supported languages:
+// https://help.openai.com/en/articles/7031512-whisper-api-faq
+// https://github.com/openai/whisper/blob/248b6cb124225dd263bb9bd32d060b6517e067f8/whisper/tokenizer.py#L79
+const LANGUAGES = {
+    en: "english",
+    zh: "chinese",
+    de: "german",
+    es: "spanish/castilian",
+    ru: "russian",
+    ko: "korean",
+    fr: "french",
+    ja: "japanese",
+    pt: "portuguese",
+    tr: "turkish",
+    pl: "polish",
+    ca: "catalan/valencian",
+    nl: "dutch/flemish",
+    ar: "arabic",
+    sv: "swedish",
+    it: "italian",
+    id: "indonesian",
+    hi: "hindi",
+    fi: "finnish",
+    vi: "vietnamese",
+    he: "hebrew",
+    uk: "ukrainian",
+    el: "greek",
+    ms: "malay",
+    cs: "czech",
+    ro: "romanian/moldavian/moldovan",
+    da: "danish",
+    hu: "hungarian",
+    ta: "tamil",
+    no: "norwegian",
+    th: "thai",
+    ur: "urdu",
+    hr: "croatian",
+    bg: "bulgarian",
+    lt: "lithuanian",
+    la: "latin",
+    mi: "maori",
+    ml: "malayalam",
+    cy: "welsh",
+    sk: "slovak",
+    te: "telugu",
+    fa: "persian",
+    lv: "latvian",
+    bn: "bengali",
+    sr: "serbian",
+    az: "azerbaijani",
+    sl: "slovenian",
+    kn: "kannada",
+    et: "estonian",
+    mk: "macedonian",
+    br: "breton",
+    eu: "basque",
+    is: "icelandic",
+    hy: "armenian",
+    ne: "nepali",
+    mn: "mongolian",
+    bs: "bosnian",
+    kk: "kazakh",
+    sq: "albanian",
+    sw: "swahili",
+    gl: "galician",
+    mr: "marathi",
+    pa: "punjabi/panjabi",
+    si: "sinhala/sinhalese",
+    km: "khmer",
+    sn: "shona",
+    yo: "yoruba",
+    so: "somali",
+    af: "afrikaans",
+    oc: "occitan",
+    ka: "georgian",
+    be: "belarusian",
+    tg: "tajik",
+    sd: "sindhi",
+    gu: "gujarati",
+    am: "amharic",
+    yi: "yiddish",
+    lo: "lao",
+    uz: "uzbek",
+    fo: "faroese",
+    ht: "haitian creole/haitian",
+    ps: "pashto/pushto",
+    tk: "turkmen",
+    nn: "nynorsk",
+    mt: "maltese",
+    sa: "sanskrit",
+    lb: "luxembourgish/letzeburgesch",
+    my: "myanmar/burmese",
+    bo: "tibetan",
+    tl: "tagalog",
+    mg: "malagasy",
+    as: "assamese",
+    tt: "tatar",
+    haw: "hawaiian",
+    ln: "lingala",
+    ha: "hausa",
+    ba: "bashkir",
+    jw: "javanese",
+    su: "sundanese",
+};
+function LanguageSelector({ language, setLanguage, ...props }) {
+    const handleLanguageChange = (event) => {
+        setLanguage(event.target.value);
+    };
+
+    const names = Object.values(LANGUAGES).map(titleCase);
+
+    return (
+        <select
+            {...props}
+            value={language} onChange={handleLanguageChange}>
+            {Object.keys(LANGUAGES).map((key, i) => (
+                <option key={key} value={key}>
+                    {names[i]}
+                </option>
+            ))}
+        </select>
+    );
+}
+export default LanguageSelector
diff --git a/examples/whisper-word-timestamps/src/components/MediaInput.jsx b/examples/whisper-word-timestamps/src/components/MediaInput.jsx
new file mode 100644
index 000000000..4bf7afcb6
--- /dev/null
+++ b/examples/whisper-word-timestamps/src/components/MediaInput.jsx
@@ -0,0 +1,194 @@
+import { useState, forwardRef, useRef, useImperativeHandle, useEffect, useCallback } from 'react';
+
+const EXAMPLE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/whisper-timestamps-demo.mp4';
+
+const MediaInput = forwardRef(({ onInputChange, onTimeUpdate, ...props }, ref) => {
+    // UI states
+    const [dragging, setDragging] = useState(false);
+    const fileInputRef = useRef(null);
+
+    // Create a reference to the audio and video elements
+    const audioElement = useRef(null);
+    const videoElement = useRef(null);
+
+    const currentTimeRef = useRef(0);
+    useImperativeHandle(ref, () => ({
+        setMediaTime(time) {
+            if (audioElement.current?.src) {
+                audioElement.current.currentTime = time;
+            } else if (videoElement.current?.src) {
+                videoElement.current.currentTime = time;
+            }
+            currentTimeRef.current = time;
+        }
+    }));
+
+    const onBufferLoad = (arrayBuffer, type) => {
+        const blob = new Blob([arrayBuffer.slice(0)], { type: type });
+        const url = URL.createObjectURL(blob);
+        processFile(arrayBuffer);
+
+        // Create a URL for the Blob
+        if (type.startsWith('audio/')) {
+            // Dispose the previous source
+            videoElement.current.pause();
+            videoElement.current.removeAttribute('src');
+            videoElement.current.load();
+
+            audioElement.current.src = url;
+        } else if (type.startsWith('video/')) {
+            // Dispose the previous source
+            audioElement.current.pause();
+            audioElement.current.removeAttribute('src');
+            audioElement.current.load();
+
+            videoElement.current.src = url;
+        } else {
+            alert(`Unsupported file type: ${type}`);
+        }
+    }
+
+    const readFile = (file) => {
+        if (!file) return;
+
+        // file.type
+        const reader = new FileReader();
+        reader.onload = (e) => {
+            onBufferLoad(e.target.result, file.type);
+        }
+        reader.readAsArrayBuffer(file);
+    }
+
+    const handleInputChange = (event) => {
+        readFile(event.target.files[0]);
+    };
+
+    const handleDragOver = (event) => {
+        event.preventDefault();
+    };
+
+    const handleDrop = (event) => {
+        event.preventDefault();
+        setDragging(false);
+        readFile(event.dataTransfer.files[0]);
+    };
+
+    const handleClick = (e) => {
+        if (e.target.tagName === 'VIDEO' || e.target.tagName === 'AUDIO') {
+            e.preventDefault();
+            fileInputRef.current.click();
+        } else if (e.target.tagName === 'INPUT') {
+            e.stopPropagation();
+        } else {
+            fileInputRef.current.click();
+            e.stopPropagation();
+        }
+    };
+
+    const processFile = async (buffer) => {
+        const audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16_000 });
+
+        try {
+            const audioBuffer = await audioContext.decodeAudioData(buffer);
+            let audio;
+            if (audioBuffer.numberOfChannels === 2) {
+                // Merge channels
+                const SCALING_FACTOR = Math.sqrt(2);
+                const left = audioBuffer.getChannelData(0);
+                const right = audioBuffer.getChannelData(1);
+                audio = new Float32Array(left.length);
+                for (let i = 0; i < audioBuffer.length; ++i) {
+                    audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2;
+                }
+            } else {
+                audio = audioBuffer.getChannelData(0);
+            }
+            onInputChange(audio);
+
+        } catch (e) {
+            alert(e);
+        }
+    };
+
+    const requestRef = useRef();
+
+    const updateTime = useCallback(() => {
+        let elem;
+        if (audioElement.current?.src) {
+            elem = audioElement.current;
+
+        } else if (videoElement.current?.src) {
+            elem = videoElement.current;
+        }
+
+        if (elem && currentTimeRef.current !== elem.currentTime) {
+            currentTimeRef.current = elem.currentTime;
+            onTimeUpdate(elem.currentTime);
+        }
+
+        // Request the next frame
+        requestRef.current = requestAnimationFrame(updateTime);
+    }, [onTimeUpdate]);
+
+    useEffect(() => {
+        // Start the animation
+        requestRef.current = requestAnimationFrame(updateTime);
+
+        return () => {
+            // Cleanup on component unmount
+            cancelAnimationFrame(requestRef.current);
+        };
+    }, [updateTime]);
+    return (
+        <div
+            {...props}
+            onClick={handleClick}
+            onDragOver={handleDragOver}
+            onDrop={handleDrop}
+            onDragEnter={(e) => setDragging(true)}
+            onDragLeave={(e) => setDragging(false)}
+        >
+            <input
+                type="file"
+                accept="audio/*,video/*"
+                onChange={handleInputChange}
+                ref={fileInputRef}
+                className="hidden"
+            />
+            {
+                <audio
+                    ref={audioElement}
+                    controls
+                    style={{ display: audioElement.current?.src ? 'block' : 'none' }}
+                    className='w-full max-h-full'
+                />
+            }
+            {
+                <video
+                    ref={videoElement}
+                    controls
+                    style={{ display: videoElement.current?.src ? 'block' : 'none' }}
+                    className='w-full max-h-full'
+                />
+            }
+            {
+                !audioElement.current?.src && !videoElement.current?.src && (
+                    <div className="w-full flex flex-col items-center justify-center border-2 border-dashed border-gray-300 rounded-md h-[250px]"
+                        style={{ borderColor: dragging ? 'blue' : 'lightgray' }}
+                    >
+                        <span className="text-gray-600 text-center"><u>Drag & drop</u> or <u>click</u><br />to select media</span>
+                        <span className="text-gray-500 text-sm hover:text-gray-800 mt-2" onClick={async (e) => {
+                            e.stopPropagation();
+                            const buffer = await fetch(EXAMPLE_URL).then((r) => r.arrayBuffer());
+                            videoElement.current.src = URL.createObjectURL(new Blob([buffer], { type: 'video/mp4' }));
+                            onBufferLoad(buffer, 'video/mp4');
+                        }}>(or <u>try an example</u>)</span>
+                    </div>
+                )
+            }
+        </div>
+    );
+});
+MediaInput.displayName = 'MediaInput';
+
+export default MediaInput;
diff --git a/examples/whisper-word-timestamps/src/components/Progress.jsx b/examples/whisper-word-timestamps/src/components/Progress.jsx
new file mode 100644
index 000000000..9ce024cc8
--- /dev/null
+++ b/examples/whisper-word-timestamps/src/components/Progress.jsx
@@ -0,0 +1,15 @@
+function formatBytes(size) {
+    const i = size == 0 ? 0 : Math.floor(Math.log(size) / Math.log(1024));
+    return +((size / Math.pow(1024, i)).toFixed(2)) * 1 + ['B', 'kB', 'MB', 'GB', 'TB'][i];
+}
+
+export default function Progress({ text, percentage, total }) {
+    percentage ??= 0;
+    return (
+        <div className="w-full bg-gray-100 dark:bg-gray-700 text-left rounded-lg overflow-hidden mb-0.5">
+            <div className="bg-blue-400 whitespace-nowrap px-1 text-sm" style={{ width: `${percentage}%` }}>
+                {text} ({percentage.toFixed(2)}%{isNaN(total) ? '' : ` of ${formatBytes(total)}`})
+            </div>
+        </div>
+    );
+}
diff --git a/examples/whisper-word-timestamps/src/components/Transcript.jsx b/examples/whisper-word-timestamps/src/components/Transcript.jsx
new file mode 100644
index 000000000..542014323
--- /dev/null
+++ b/examples/whisper-word-timestamps/src/components/Transcript.jsx
@@ -0,0 +1,68 @@
+import { useMemo } from "react";
+
+const Chunk = ({ chunk, currentTime, onClick, ...props }) => {
+    const { text, timestamp } = chunk;
+    const [start, end] = timestamp;
+
+    const bolded = start <= currentTime && currentTime < end;
+
+    return (
+        <span {...props}>
+            {text.startsWith(' ') ? " " : ""}
+            <span
+                onClick={onClick}
+                className="text-md text-gray-600 cursor-pointer hover:text-red-600"
+                title={timestamp.map(x => x.toFixed(2)).join(' → ')}
+                style={{
+                    textDecoration: bolded ? 'underline' : 'none',
+                    textShadow: bolded ? '0 0 1px #000' : 'none',
+                }}
+            >{text.trim()}</span>
+        </span>
+    )
+}
+
+const Transcript = ({ transcript, currentTime, setCurrentTime, ...props }) => {
+
+
+    const jsonTranscript = useMemo(() => {
+        return JSON.stringify(transcript, null, 2)
+            // post-process the JSON to make it more readable
+            .replace(/( {4}"timestamp": )\[\s+(\S+)\s+(\S+)\s+\]/gm, "$1[$2 $3]");
+    }, [transcript]);
+
+    const downloadTranscript = () => {
+        const blob = new Blob([jsonTranscript], { type: 'application/json' });
+        const url = URL.createObjectURL(blob);
+        const a = document.createElement('a');
+        a.href = url;
+        a.download = 'transcript.json';
+        a.click();
+        URL.revokeObjectURL(url);
+    }
+
+    return (<>
+        <div {...props}>
+            {
+                transcript.chunks.map((chunk, i) => <Chunk key={i} chunk={chunk} currentTime={currentTime} onClick={e => {
+                    setCurrentTime(chunk.timestamp[0]) // Set to start of chunk
+                }} />)
+            }
+        </div>
+
+        <div className="flex justify-center border-t text-sm text-gray-600 max-h-[150px] overflow-y-auto p-2 scrollbar-thin">
+            <button
+                className="flex items-center border px-2 py-1 rounded-lg bg-green-400 text-white hover:bg-green-500"
+                onClick={downloadTranscript}
+            >
+                <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" strokeWidth={1.5} stroke="currentColor" className="size-6 mr-1">
+                    <path strokeLinecap="round" strokeLinejoin="round" d="M3 16.5v2.25A2.25 2.25 0 0 0 5.25 21h13.5A2.25 2.25 0 0 0 21 18.75V16.5M16.5 12 12 16.5m0 0L7.5 12m4.5 4.5V3" />
+                </svg>
+                Download transcript
+            </button>
+        </div>
+
+
+    </>)
+};
+export default Transcript;
diff --git a/examples/whisper-word-timestamps/src/index.css b/examples/whisper-word-timestamps/src/index.css
new file mode 100644
index 000000000..87bbb9dac
--- /dev/null
+++ b/examples/whisper-word-timestamps/src/index.css
@@ -0,0 +1,25 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+@layer utilities {
+  .scrollbar-thin::-webkit-scrollbar {
+    @apply w-2;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-track {
+    @apply rounded-full bg-gray-100 dark:bg-gray-700;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-thumb {
+    @apply rounded-full bg-gray-300 dark:bg-gray-600;
+  }
+
+  .scrollbar-thin::-webkit-scrollbar-thumb:hover {
+    @apply bg-gray-500;
+  }
+}
+
+html {
+  @apply scrollbar-thin;
+}
\ No newline at end of file
diff --git a/examples/whisper-word-timestamps/src/main.jsx b/examples/whisper-word-timestamps/src/main.jsx
new file mode 100644
index 000000000..54b39dd1d
--- /dev/null
+++ b/examples/whisper-word-timestamps/src/main.jsx
@@ -0,0 +1,10 @@
+import React from 'react'
+import ReactDOM from 'react-dom/client'
+import App from './App.jsx'
+import './index.css'
+
+ReactDOM.createRoot(document.getElementById('root')).render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>,
+)
diff --git a/examples/whisper-word-timestamps/src/worker.js b/examples/whisper-word-timestamps/src/worker.js
new file mode 100644
index 000000000..efa029a97
--- /dev/null
+++ b/examples/whisper-word-timestamps/src/worker.js
@@ -0,0 +1,94 @@
+
+import { pipeline } from '@xenova/transformers';
+
+const PER_DEVICE_CONFIG = {
+    webgpu: {
+        dtype: {
+            encoder_model: 'fp32',
+            decoder_model_merged: 'q4',
+        },
+        device: 'webgpu',
+    },
+    wasm: {
+        dtype: 'q8',
+        device: 'wasm',
+    },
+};
+
+/**
+ * This class uses the Singleton pattern to ensure that only one instance of the model is loaded.
+ */
+class PipelineSingeton {
+    static model_id = 'onnx-community/whisper-base_timestamped';
+    static instance = null;
+
+    static async getInstance(progress_callback = null, device = 'webgpu') {
+
+        if (!this.instance) {
+            this.instance = pipeline('automatic-speech-recognition', this.model_id, {
+                ...PER_DEVICE_CONFIG[device],
+                progress_callback,
+            });
+        }
+        return this.instance;
+    }
+}
+
+async function load({ device }) {
+    self.postMessage({
+        status: 'loading',
+        data: `Loading model (${device})...`
+    });
+
+    // Load the pipeline and save it for future use.
+    const transcriber = await PipelineSingeton.getInstance(x => {
+        // We also add a progress callback to the pipeline so that we can
+        // track model loading.
+        self.postMessage(x);
+    }, device);
+
+    if (device === 'webgpu') {
+        self.postMessage({
+            status: 'loading',
+            data: 'Compiling shaders and warming up model...'
+        });
+
+        await transcriber(new Float32Array(16_000), {
+            language: 'en',
+        });
+    }
+
+    self.postMessage({ status: 'ready' });
+}
+
+async function run({ audio, language }) {
+    const transcriber = await PipelineSingeton.getInstance();
+
+    // Read and preprocess image
+    const start = performance.now();
+
+    const result = await transcriber(audio, {
+        language,
+        return_timestamps: 'word',
+        chunk_length_s: 30,
+    });
+
+    const end = performance.now();
+
+    self.postMessage({ status: 'complete', result, time: end - start });
+}
+
+// Listen for messages from the main thread
+self.addEventListener('message', async (e) => {
+    const { type, data } = e.data;
+
+    switch (type) {
+        case 'load':
+            load(data);
+            break;
+
+        case 'run':
+            run(data);
+            break;
+    }
+});
diff --git a/examples/whisper-word-timestamps/tailwind.config.js b/examples/whisper-word-timestamps/tailwind.config.js
new file mode 100644
index 000000000..d37737fc0
--- /dev/null
+++ b/examples/whisper-word-timestamps/tailwind.config.js
@@ -0,0 +1,12 @@
+/** @type {import('tailwindcss').Config} */
+export default {
+  content: [
+    "./index.html",
+    "./src/**/*.{js,ts,jsx,tsx}",
+  ],
+  theme: {
+    extend: {},
+  },
+  plugins: [],
+}
+
diff --git a/examples/whisper-word-timestamps/vite.config.js b/examples/whisper-word-timestamps/vite.config.js
new file mode 100644
index 000000000..5a33944a9
--- /dev/null
+++ b/examples/whisper-word-timestamps/vite.config.js
@@ -0,0 +1,7 @@
+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+
+// https://vitejs.dev/config/
+export default defineConfig({
+  plugins: [react()],
+})
diff --git a/jest.config.mjs b/jest.config.mjs
index b6a5fdb3d..0d15ce842 100644
--- a/jest.config.mjs
+++ b/jest.config.mjs
@@ -23,9 +23,10 @@ export default {
   coverageDirectory: "coverage",
 
   // An array of regexp pattern strings used to skip coverage collection
-  // coveragePathIgnorePatterns: [
-  //   "\\\\node_modules\\\\"
-  // ],
+  coveragePathIgnorePatterns: [
+    "node_modules",
+    "tests",
+  ],
 
   // Indicates which provider should be used to instrument code for coverage
   coverageProvider: "v8",
@@ -121,9 +122,7 @@ export default {
   // rootDir: undefined,
 
   // A list of paths to directories that Jest should use to search for files in
-  roots: [
-    "./tests/"
-  ],
+  roots: ["./tests/"],
 
   // Allows you to use a custom runner instead of Jest's default test runner
   // runner: "jest-runner",
@@ -170,7 +169,7 @@ export default {
   // testRunner: "jest-circus/runner",
 
   // A map from regular expressions to paths to transformers
-  transform: {}
+  transform: {},
 
   // An array of regexp pattern strings that are matched against all source file paths, matched files will skip transformation
   // transformIgnorePatterns: [
diff --git a/jsconfig.json b/jsconfig.json
index 5430d98f2..9af7d54be 100644
--- a/jsconfig.json
+++ b/jsconfig.json
@@ -1,18 +1,14 @@
 {
-    // Only include files in the src directory
-    "include": [
-        "src/**/*"
-    ],
-    "compilerOptions": {
-        // Tells the compiler to check JS files
-        "checkJs": true,
-        "target": "esnext",
-        "module": "esnext",
-        "moduleResolution": "nodenext",
-    },
-    "typeAcquisition": {
-        "include": [
-            "jest"
-        ]
-    }
-}
\ No newline at end of file
+  // Only include files in the src directory
+  "include": ["src/**/*"],
+  "compilerOptions": {
+    // Tells the compiler to check JS files
+    "checkJs": true,
+    "target": "esnext",
+    "module": "nodenext",
+    "moduleResolution": "nodenext"
+  },
+  "typeAcquisition": {
+    "include": ["jest"]
+  }
+}
diff --git a/package-lock.json b/package-lock.json
index 3fba478d5..17b45d57e 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,33 +1,32 @@
 {
-  "name": "@xenova/transformers",
-  "version": "2.17.2",
+  "name": "@huggingface/transformers",
+  "version": "3.0.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
-      "name": "@xenova/transformers",
-      "version": "2.17.2",
+      "name": "@huggingface/transformers",
+      "version": "3.0.0",
       "license": "Apache-2.0",
       "dependencies": {
-        "@huggingface/jinja": "^0.2.2",
-        "onnxruntime-web": "1.14.0",
-        "sharp": "^0.32.0"
+        "@huggingface/jinja": "^0.3.0",
+        "onnxruntime-node": "1.19.2",
+        "onnxruntime-web": "1.20.0-dev.20241016-2b8fc5529b",
+        "sharp": "^0.33.5"
       },
       "devDependencies": {
         "@types/jest": "^29.5.1",
+        "@webgpu/types": "^0.1.44",
         "catharsis": "github:xenova/catharsis",
-        "copy-webpack-plugin": "^11.0.0",
         "jest": "^29.5.0",
         "jest-environment-node": "^29.5.0",
         "jsdoc-to-markdown": "^8.0.1",
+        "prettier": "3.3.3",
         "typescript": "^5.2.2",
         "wavefile": "^11.0.0",
         "webpack": "^5.80.0",
         "webpack-cli": "^5.0.2",
         "webpack-dev-server": "^4.13.3"
-      },
-      "optionalDependencies": {
-        "onnxruntime-node": "1.14.0"
       }
     },
     "node_modules/@ampproject/remapping": {
@@ -744,14 +743,465 @@
         "node": ">=10.0.0"
       }
     },
+    "node_modules/@emnapi/runtime": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.2.0.tgz",
+      "integrity": "sha512-bV21/9LQmcQeCPEg3BDFtvwL6cwiTMksYNWQQ4KOxCZikEGalWtenoZ0wCiukJINlGCIi2KXx01g4FoH/LxpzQ==",
+      "optional": true,
+      "dependencies": {
+        "tslib": "^2.4.0"
+      }
+    },
     "node_modules/@huggingface/jinja": {
-      "version": "0.2.2",
-      "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.2.2.tgz",
-      "integrity": "sha512-/KPde26khDUIPkTGU82jdtTW9UAuvUTumCAbFs/7giR0SxsvZC4hru51PBvpijH6BVkHcROcvZM/lpy5h1jRRA==",
+      "version": "0.3.0",
+      "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.3.0.tgz",
+      "integrity": "sha512-GLJzso0M07ZncFkrJMIXVU4os6GFbPocD4g8fMQPMGJubf48FtGOsUORH2rtFdXPIPelz8SLBMn8ZRmOTwXm9Q==",
       "engines": {
         "node": ">=18"
       }
     },
+    "node_modules/@img/sharp-darwin-arm64": {
+      "version": "0.33.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.33.5.tgz",
+      "integrity": "sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-darwin-arm64": "1.0.4"
+      }
+    },
+    "node_modules/@img/sharp-darwin-x64": {
+      "version": "0.33.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.33.5.tgz",
+      "integrity": "sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-darwin-x64": "1.0.4"
+      }
+    },
+    "node_modules/@img/sharp-libvips-darwin-arm64": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.0.4.tgz",
+      "integrity": "sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-darwin-x64": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.0.4.tgz",
+      "integrity": "sha512-xnGR8YuZYfJGmWPvmlunFaWJsb9T/AO2ykoP3Fz/0X5XV2aoYBPkX6xqCQvUTKKiLddarLaxpzNe+b1hjeWHAQ==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-arm": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.0.5.tgz",
+      "integrity": "sha512-gvcC4ACAOPRNATg/ov8/MnbxFDJqf/pDePbBnuBDcjsI8PssmjoKMAz4LtLaVi+OnSb5FK/yIOamqDwGmXW32g==",
+      "cpu": [
+        "arm"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-arm64": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.0.4.tgz",
+      "integrity": "sha512-9B+taZ8DlyyqzZQnoeIvDVR/2F4EbMepXMc/NdVbkzsJbzkUjhXv/70GQJ7tdLA4YJgNP25zukcxpX2/SueNrA==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-s390x": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.0.4.tgz",
+      "integrity": "sha512-u7Wz6ntiSSgGSGcjZ55im6uvTrOxSIS8/dgoVMoiGE9I6JAfU50yH5BoDlYA1tcuGS7g/QNtetJnxA6QEsCVTA==",
+      "cpu": [
+        "s390x"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linux-x64": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.0.4.tgz",
+      "integrity": "sha512-MmWmQ3iPFZr0Iev+BAgVMb3ZyC4KeFc3jFxnNbEPas60e1cIfevbtuyf9nDGIzOaW9PdnDciJm+wFFaTlj5xYw==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linuxmusl-arm64": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.0.4.tgz",
+      "integrity": "sha512-9Ti+BbTYDcsbp4wfYib8Ctm1ilkugkA/uscUn6UXK1ldpC1JjiXbLfFZtRlBhjPZ5o1NCLiDbg8fhUPKStHoTA==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-libvips-linuxmusl-x64": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.0.4.tgz",
+      "integrity": "sha512-viYN1KX9m+/hGkJtvYYp+CCLgnJXwiQB39damAO7WMdKWlIhmYTfHjwSbQeUK/20vY154mwezd9HflVFM1wVSw==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-linux-arm": {
+      "version": "0.33.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.33.5.tgz",
+      "integrity": "sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ==",
+      "cpu": [
+        "arm"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-arm": "1.0.5"
+      }
+    },
+    "node_modules/@img/sharp-linux-arm64": {
+      "version": "0.33.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.33.5.tgz",
+      "integrity": "sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-arm64": "1.0.4"
+      }
+    },
+    "node_modules/@img/sharp-linux-s390x": {
+      "version": "0.33.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.33.5.tgz",
+      "integrity": "sha512-y/5PCd+mP4CA/sPDKl2961b+C9d+vPAveS33s6Z3zfASk2j5upL6fXVPZi7ztePZ5CuH+1kW8JtvxgbuXHRa4Q==",
+      "cpu": [
+        "s390x"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-s390x": "1.0.4"
+      }
+    },
+    "node_modules/@img/sharp-linux-x64": {
+      "version": "0.33.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.33.5.tgz",
+      "integrity": "sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linux-x64": "1.0.4"
+      }
+    },
+    "node_modules/@img/sharp-linuxmusl-arm64": {
+      "version": "0.33.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.33.5.tgz",
+      "integrity": "sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linuxmusl-arm64": "1.0.4"
+      }
+    },
+    "node_modules/@img/sharp-linuxmusl-x64": {
+      "version": "0.33.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.33.5.tgz",
+      "integrity": "sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      },
+      "optionalDependencies": {
+        "@img/sharp-libvips-linuxmusl-x64": "1.0.4"
+      }
+    },
+    "node_modules/@img/sharp-wasm32": {
+      "version": "0.33.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.33.5.tgz",
+      "integrity": "sha512-ykUW4LVGaMcU9lu9thv85CbRMAwfeadCJHRsg2GmeRa/cJxsVY9Rbd57JcMxBkKHag5U/x7TSBpScF4U8ElVzg==",
+      "cpu": [
+        "wasm32"
+      ],
+      "optional": true,
+      "dependencies": {
+        "@emnapi/runtime": "^1.2.0"
+      },
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-win32-ia32": {
+      "version": "0.33.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.33.5.tgz",
+      "integrity": "sha512-T36PblLaTwuVJ/zw/LaH0PdZkRz5rd3SmMHX8GSmR7vtNSP5Z6bQkExdSK7xGWyxLw4sUknBuugTelgw2faBbQ==",
+      "cpu": [
+        "ia32"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@img/sharp-win32-x64": {
+      "version": "0.33.5",
+      "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.33.5.tgz",
+      "integrity": "sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/libvips"
+      }
+    },
+    "node_modules/@isaacs/cliui": {
+      "version": "8.0.2",
+      "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz",
+      "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==",
+      "dependencies": {
+        "string-width": "^5.1.2",
+        "string-width-cjs": "npm:string-width@^4.2.0",
+        "strip-ansi": "^7.0.1",
+        "strip-ansi-cjs": "npm:strip-ansi@^6.0.1",
+        "wrap-ansi": "^8.1.0",
+        "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/@isaacs/cliui/node_modules/ansi-regex": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz",
+      "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-regex?sponsor=1"
+      }
+    },
+    "node_modules/@isaacs/cliui/node_modules/ansi-styles": {
+      "version": "6.2.1",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz",
+      "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+      }
+    },
+    "node_modules/@isaacs/cliui/node_modules/emoji-regex": {
+      "version": "9.2.2",
+      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz",
+      "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg=="
+    },
+    "node_modules/@isaacs/cliui/node_modules/string-width": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz",
+      "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==",
+      "dependencies": {
+        "eastasianwidth": "^0.2.0",
+        "emoji-regex": "^9.2.2",
+        "strip-ansi": "^7.0.1"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/@isaacs/cliui/node_modules/strip-ansi": {
+      "version": "7.1.0",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz",
+      "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==",
+      "dependencies": {
+        "ansi-regex": "^6.0.1"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/strip-ansi?sponsor=1"
+      }
+    },
+    "node_modules/@isaacs/cliui/node_modules/wrap-ansi": {
+      "version": "8.1.0",
+      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz",
+      "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==",
+      "dependencies": {
+        "ansi-styles": "^6.1.0",
+        "string-width": "^5.0.1",
+        "strip-ansi": "^7.0.1"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
+      }
+    },
+    "node_modules/@isaacs/fs-minipass": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz",
+      "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==",
+      "dependencies": {
+        "minipass": "^7.0.4"
+      },
+      "engines": {
+        "node": ">=18.0.0"
+      }
+    },
     "node_modules/@istanbuljs/load-nyc-config": {
       "version": "1.1.0",
       "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz",
@@ -1116,14 +1566,14 @@
       }
     },
     "node_modules/@jridgewell/gen-mapping": {
-      "version": "0.3.3",
-      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.3.tgz",
-      "integrity": "sha512-HLhSWOLRi875zjjMG/r+Nv0oCW8umGb0BgEhyX3dDX3egwZtB8PqLnjz3yedt8R5StBrzcg4aBpnh8UA9D1BoQ==",
+      "version": "0.3.5",
+      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz",
+      "integrity": "sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==",
       "dev": true,
       "dependencies": {
-        "@jridgewell/set-array": "^1.0.1",
+        "@jridgewell/set-array": "^1.2.1",
         "@jridgewell/sourcemap-codec": "^1.4.10",
-        "@jridgewell/trace-mapping": "^0.3.9"
+        "@jridgewell/trace-mapping": "^0.3.24"
       },
       "engines": {
         "node": ">=6.0.0"
@@ -1139,22 +1589,22 @@
       }
     },
     "node_modules/@jridgewell/set-array": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.1.2.tgz",
-      "integrity": "sha512-xnkseuNADM0gt2bs+BvhO0p78Mk762YnZdsuzFV018NoG1Sj1SCQvpSqa7XUaTam5vAGasABV9qXASMKnFMwMw==",
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz",
+      "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==",
       "dev": true,
       "engines": {
         "node": ">=6.0.0"
       }
     },
     "node_modules/@jridgewell/source-map": {
-      "version": "0.3.3",
-      "resolved": "https://registry.npmjs.org/@jridgewell/source-map/-/source-map-0.3.3.tgz",
-      "integrity": "sha512-b+fsZXeLYi9fEULmfBrhxn4IrPlINf8fiNarzTof004v3lFdntdwa9PF7vFJqm3mg7s+ScJMxXaE3Acp1irZcg==",
+      "version": "0.3.6",
+      "resolved": "https://registry.npmjs.org/@jridgewell/source-map/-/source-map-0.3.6.tgz",
+      "integrity": "sha512-1ZJTZebgqllO79ue2bm3rIGud/bOe0pP5BjSRCRxxYkEZS8STV7zN84UBbiYu7jy+eCKSnVIUgoWWE/tt+shMQ==",
       "dev": true,
       "dependencies": {
-        "@jridgewell/gen-mapping": "^0.3.0",
-        "@jridgewell/trace-mapping": "^0.3.9"
+        "@jridgewell/gen-mapping": "^0.3.5",
+        "@jridgewell/trace-mapping": "^0.3.25"
       }
     },
     "node_modules/@jridgewell/sourcemap-codec": {
@@ -1164,21 +1614,15 @@
       "dev": true
     },
     "node_modules/@jridgewell/trace-mapping": {
-      "version": "0.3.18",
-      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.18.tgz",
-      "integrity": "sha512-w+niJYzMHdd7USdiH2U6869nqhD2nbfZXND5Yp93qIbEmnDNk7PD48o+YchRVpzMU7M6jVCbenTR7PA1FLQ9pA==",
+      "version": "0.3.25",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz",
+      "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==",
       "dev": true,
       "dependencies": {
-        "@jridgewell/resolve-uri": "3.1.0",
-        "@jridgewell/sourcemap-codec": "1.4.14"
+        "@jridgewell/resolve-uri": "^3.1.0",
+        "@jridgewell/sourcemap-codec": "^1.4.14"
       }
     },
-    "node_modules/@jridgewell/trace-mapping/node_modules/@jridgewell/sourcemap-codec": {
-      "version": "1.4.14",
-      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.14.tgz",
-      "integrity": "sha512-XPSJHWmi394fuUuzDnGz1wiKqWfo1yXecHQMRf2l6hztTO+nPru658AyDngaBe7isIxEkRsPR3FZh+s7iVa4Uw==",
-      "dev": true
-    },
     "node_modules/@jsdoc/salty": {
       "version": "0.2.5",
       "resolved": "https://registry.npmjs.org/@jsdoc/salty/-/salty-0.2.5.tgz",
@@ -1206,39 +1650,13 @@
         "semver": "bin/semver.js"
       }
     },
-    "node_modules/@nodelib/fs.scandir": {
-      "version": "2.1.5",
-      "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz",
-      "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==",
-      "dev": true,
-      "dependencies": {
-        "@nodelib/fs.stat": "2.0.5",
-        "run-parallel": "^1.1.9"
-      },
-      "engines": {
-        "node": ">= 8"
-      }
-    },
-    "node_modules/@nodelib/fs.stat": {
-      "version": "2.0.5",
-      "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz",
-      "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==",
-      "dev": true,
-      "engines": {
-        "node": ">= 8"
-      }
-    },
-    "node_modules/@nodelib/fs.walk": {
-      "version": "1.2.8",
-      "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz",
-      "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==",
-      "dev": true,
-      "dependencies": {
-        "@nodelib/fs.scandir": "2.1.5",
-        "fastq": "^1.6.0"
-      },
+    "node_modules/@pkgjs/parseargs": {
+      "version": "0.11.0",
+      "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz",
+      "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==",
+      "optional": true,
       "engines": {
-        "node": ">= 8"
+        "node": ">=14"
       }
     },
     "node_modules/@protobufjs/aspromise": {
@@ -1398,30 +1816,10 @@
         "@types/node": "*"
       }
     },
-    "node_modules/@types/eslint": {
-      "version": "8.37.0",
-      "resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-8.37.0.tgz",
-      "integrity": "sha512-Piet7dG2JBuDIfohBngQ3rCt7MgO9xCO4xIMKxBThCq5PNRB91IjlJ10eJVwfoNtvTErmxLzwBZ7rHZtbOMmFQ==",
-      "dev": true,
-      "dependencies": {
-        "@types/estree": "*",
-        "@types/json-schema": "*"
-      }
-    },
-    "node_modules/@types/eslint-scope": {
-      "version": "3.7.4",
-      "resolved": "https://registry.npmjs.org/@types/eslint-scope/-/eslint-scope-3.7.4.tgz",
-      "integrity": "sha512-9K4zoImiZc3HlIp6AVUDE4CWYx22a+lhSZMYNpbjW04+YF0KWj4pJXnEMjdnFTiQibFFmElcsasJXDbdI/EPhA==",
-      "dev": true,
-      "dependencies": {
-        "@types/eslint": "*",
-        "@types/estree": "*"
-      }
-    },
     "node_modules/@types/estree": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.1.tgz",
-      "integrity": "sha512-LG4opVs2ANWZ1TJoKc937iMmNstM/d0ae1vNbnBvBhqCSezgVUOzcLCqbI5elV8Vy6WKwKjaqR+zO9VKirBBCA==",
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.5.tgz",
+      "integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==",
       "dev": true
     },
     "node_modules/@types/express": {
@@ -1621,151 +2019,157 @@
       "dev": true
     },
     "node_modules/@webassemblyjs/ast": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.11.5.tgz",
-      "integrity": "sha512-LHY/GSAZZRpsNQH+/oHqhRQ5FT7eoULcBqgfyTB5nQHogFnK3/7QoN7dLnwSE/JkUAF0SrRuclT7ODqMFtWxxQ==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.12.1.tgz",
+      "integrity": "sha512-EKfMUOPRRUTy5UII4qJDGPpqfwjOmZ5jeGFwid9mnoqIFK+e0vqoi1qH56JpmZSzEL53jKnNzScdmftJyG5xWg==",
       "dev": true,
       "dependencies": {
-        "@webassemblyjs/helper-numbers": "1.11.5",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.5"
+        "@webassemblyjs/helper-numbers": "1.11.6",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6"
       }
     },
     "node_modules/@webassemblyjs/floating-point-hex-parser": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.11.5.tgz",
-      "integrity": "sha512-1j1zTIC5EZOtCplMBG/IEwLtUojtwFVwdyVMbL/hwWqbzlQoJsWCOavrdnLkemwNoC/EOwtUFch3fuo+cbcXYQ==",
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.11.6.tgz",
+      "integrity": "sha512-ejAj9hfRJ2XMsNHk/v6Fu2dGS+i4UaXBXGemOfQ/JfQ6mdQg/WXtwleQRLLS4OvfDhv8rYnVwH27YJLMyYsxhw==",
       "dev": true
     },
     "node_modules/@webassemblyjs/helper-api-error": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.11.5.tgz",
-      "integrity": "sha512-L65bDPmfpY0+yFrsgz8b6LhXmbbs38OnwDCf6NpnMUYqa+ENfE5Dq9E42ny0qz/PdR0LJyq/T5YijPnU8AXEpA==",
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.11.6.tgz",
+      "integrity": "sha512-o0YkoP4pVu4rN8aTJgAyj9hC2Sv5UlkzCHhxqWj8butaLvnpdc2jOwh4ewE6CX0txSfLn/UYaV/pheS2Txg//Q==",
       "dev": true
     },
     "node_modules/@webassemblyjs/helper-buffer": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.11.5.tgz",
-      "integrity": "sha512-fDKo1gstwFFSfacIeH5KfwzjykIE6ldh1iH9Y/8YkAZrhmu4TctqYjSh7t0K2VyDSXOZJ1MLhht/k9IvYGcIxg==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.12.1.tgz",
+      "integrity": "sha512-nzJwQw99DNDKr9BVCOZcLuJJUlqkJh+kVzVl6Fmq/tI5ZtEyWT1KZMyOXltXLZJmDtvLCDgwsyrkohEtopTXCw==",
       "dev": true
     },
     "node_modules/@webassemblyjs/helper-numbers": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-numbers/-/helper-numbers-1.11.5.tgz",
-      "integrity": "sha512-DhykHXM0ZABqfIGYNv93A5KKDw/+ywBFnuWybZZWcuzWHfbp21wUfRkbtz7dMGwGgT4iXjWuhRMA2Mzod6W4WA==",
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-numbers/-/helper-numbers-1.11.6.tgz",
+      "integrity": "sha512-vUIhZ8LZoIWHBohiEObxVm6hwP034jwmc9kuq5GdHZH0wiLVLIPcMCdpJzG4C11cHoQ25TFIQj9kaVADVX7N3g==",
       "dev": true,
       "dependencies": {
-        "@webassemblyjs/floating-point-hex-parser": "1.11.5",
-        "@webassemblyjs/helper-api-error": "1.11.5",
+        "@webassemblyjs/floating-point-hex-parser": "1.11.6",
+        "@webassemblyjs/helper-api-error": "1.11.6",
         "@xtuc/long": "4.2.2"
       }
     },
     "node_modules/@webassemblyjs/helper-wasm-bytecode": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.11.5.tgz",
-      "integrity": "sha512-oC4Qa0bNcqnjAowFn7MPCETQgDYytpsfvz4ujZz63Zu/a/v71HeCAAmZsgZ3YVKec3zSPYytG3/PrRCqbtcAvA==",
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.11.6.tgz",
+      "integrity": "sha512-sFFHKwcmBprO9e7Icf0+gddyWYDViL8bpPjJJl0WHxCdETktXdmtWLGVzoHbqUcY4Be1LkNfwTmXOJUFZYSJdA==",
       "dev": true
     },
     "node_modules/@webassemblyjs/helper-wasm-section": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.11.5.tgz",
-      "integrity": "sha512-uEoThA1LN2NA+K3B9wDo3yKlBfVtC6rh0i4/6hvbz071E8gTNZD/pT0MsBf7MeD6KbApMSkaAK0XeKyOZC7CIA==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.12.1.tgz",
+      "integrity": "sha512-Jif4vfB6FJlUlSbgEMHUyk1j234GTNG9dBJ4XJdOySoj518Xj0oGsNi59cUQF4RRMS9ouBUxDDdyBVfPTypa5g==",
       "dev": true,
       "dependencies": {
-        "@webassemblyjs/ast": "1.11.5",
-        "@webassemblyjs/helper-buffer": "1.11.5",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.5",
-        "@webassemblyjs/wasm-gen": "1.11.5"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-buffer": "1.12.1",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6",
+        "@webassemblyjs/wasm-gen": "1.12.1"
       }
     },
     "node_modules/@webassemblyjs/ieee754": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.11.5.tgz",
-      "integrity": "sha512-37aGq6qVL8A8oPbPrSGMBcp38YZFXcHfiROflJn9jxSdSMMM5dS5P/9e2/TpaJuhE+wFrbukN2WI6Hw9MH5acg==",
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.11.6.tgz",
+      "integrity": "sha512-LM4p2csPNvbij6U1f19v6WR56QZ8JcHg3QIJTlSwzFcmx6WSORicYj6I63f9yU1kEUtrpG+kjkiIAkevHpDXrg==",
       "dev": true,
       "dependencies": {
         "@xtuc/ieee754": "^1.2.0"
       }
     },
     "node_modules/@webassemblyjs/leb128": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.11.5.tgz",
-      "integrity": "sha512-ajqrRSXaTJoPW+xmkfYN6l8VIeNnR4vBOTQO9HzR7IygoCcKWkICbKFbVTNMjMgMREqXEr0+2M6zukzM47ZUfQ==",
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.11.6.tgz",
+      "integrity": "sha512-m7a0FhE67DQXgouf1tbN5XQcdWoNgaAuoULHIfGFIEVKA6tu/edls6XnIlkmS6FrXAquJRPni3ZZKjw6FSPjPQ==",
       "dev": true,
       "dependencies": {
         "@xtuc/long": "4.2.2"
       }
     },
     "node_modules/@webassemblyjs/utf8": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.11.5.tgz",
-      "integrity": "sha512-WiOhulHKTZU5UPlRl53gHR8OxdGsSOxqfpqWeA2FmcwBMaoEdz6b2x2si3IwC9/fSPLfe8pBMRTHVMk5nlwnFQ==",
+      "version": "1.11.6",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.11.6.tgz",
+      "integrity": "sha512-vtXf2wTQ3+up9Zsg8sa2yWiQpzSsMyXj0qViVP6xKGCUT8p8YJ6HqI7l5eCnWx1T/FYdsv07HQs2wTFbbof/RA==",
       "dev": true
     },
     "node_modules/@webassemblyjs/wasm-edit": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.11.5.tgz",
-      "integrity": "sha512-C0p9D2fAu3Twwqvygvf42iGCQ4av8MFBLiTb+08SZ4cEdwzWx9QeAHDo1E2k+9s/0w1DM40oflJOpkZ8jW4HCQ==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.12.1.tgz",
+      "integrity": "sha512-1DuwbVvADvS5mGnXbE+c9NfA8QRcZ6iKquqjjmR10k6o+zzsRVesil54DKexiowcFCPdr/Q0qaMgB01+SQ1u6g==",
       "dev": true,
       "dependencies": {
-        "@webassemblyjs/ast": "1.11.5",
-        "@webassemblyjs/helper-buffer": "1.11.5",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.5",
-        "@webassemblyjs/helper-wasm-section": "1.11.5",
-        "@webassemblyjs/wasm-gen": "1.11.5",
-        "@webassemblyjs/wasm-opt": "1.11.5",
-        "@webassemblyjs/wasm-parser": "1.11.5",
-        "@webassemblyjs/wast-printer": "1.11.5"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-buffer": "1.12.1",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6",
+        "@webassemblyjs/helper-wasm-section": "1.12.1",
+        "@webassemblyjs/wasm-gen": "1.12.1",
+        "@webassemblyjs/wasm-opt": "1.12.1",
+        "@webassemblyjs/wasm-parser": "1.12.1",
+        "@webassemblyjs/wast-printer": "1.12.1"
       }
     },
     "node_modules/@webassemblyjs/wasm-gen": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.11.5.tgz",
-      "integrity": "sha512-14vteRlRjxLK9eSyYFvw1K8Vv+iPdZU0Aebk3j6oB8TQiQYuO6hj9s4d7qf6f2HJr2khzvNldAFG13CgdkAIfA==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.12.1.tgz",
+      "integrity": "sha512-TDq4Ojh9fcohAw6OIMXqiIcTq5KUXTGRkVxbSo1hQnSy6lAM5GSdfwWeSxpAo0YzgsgF182E/U0mDNhuA0tW7w==",
       "dev": true,
       "dependencies": {
-        "@webassemblyjs/ast": "1.11.5",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.5",
-        "@webassemblyjs/ieee754": "1.11.5",
-        "@webassemblyjs/leb128": "1.11.5",
-        "@webassemblyjs/utf8": "1.11.5"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6",
+        "@webassemblyjs/ieee754": "1.11.6",
+        "@webassemblyjs/leb128": "1.11.6",
+        "@webassemblyjs/utf8": "1.11.6"
       }
     },
     "node_modules/@webassemblyjs/wasm-opt": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.11.5.tgz",
-      "integrity": "sha512-tcKwlIXstBQgbKy1MlbDMlXaxpucn42eb17H29rawYLxm5+MsEmgPzeCP8B1Cl69hCice8LeKgZpRUAPtqYPgw==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.12.1.tgz",
+      "integrity": "sha512-Jg99j/2gG2iaz3hijw857AVYekZe2SAskcqlWIZXjji5WStnOpVoat3gQfT/Q5tb2djnCjBtMocY/Su1GfxPBg==",
       "dev": true,
       "dependencies": {
-        "@webassemblyjs/ast": "1.11.5",
-        "@webassemblyjs/helper-buffer": "1.11.5",
-        "@webassemblyjs/wasm-gen": "1.11.5",
-        "@webassemblyjs/wasm-parser": "1.11.5"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-buffer": "1.12.1",
+        "@webassemblyjs/wasm-gen": "1.12.1",
+        "@webassemblyjs/wasm-parser": "1.12.1"
       }
     },
     "node_modules/@webassemblyjs/wasm-parser": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.11.5.tgz",
-      "integrity": "sha512-SVXUIwsLQlc8srSD7jejsfTU83g7pIGr2YYNb9oHdtldSxaOhvA5xwvIiWIfcX8PlSakgqMXsLpLfbbJ4cBYew==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.12.1.tgz",
+      "integrity": "sha512-xikIi7c2FHXysxXe3COrVUPSheuBtpcfhbpFj4gmu7KRLYOzANztwUU0IbsqvMqzuNK2+glRGWCEqZo1WCLyAQ==",
       "dev": true,
       "dependencies": {
-        "@webassemblyjs/ast": "1.11.5",
-        "@webassemblyjs/helper-api-error": "1.11.5",
-        "@webassemblyjs/helper-wasm-bytecode": "1.11.5",
-        "@webassemblyjs/ieee754": "1.11.5",
-        "@webassemblyjs/leb128": "1.11.5",
-        "@webassemblyjs/utf8": "1.11.5"
+        "@webassemblyjs/ast": "1.12.1",
+        "@webassemblyjs/helper-api-error": "1.11.6",
+        "@webassemblyjs/helper-wasm-bytecode": "1.11.6",
+        "@webassemblyjs/ieee754": "1.11.6",
+        "@webassemblyjs/leb128": "1.11.6",
+        "@webassemblyjs/utf8": "1.11.6"
       }
     },
     "node_modules/@webassemblyjs/wast-printer": {
-      "version": "1.11.5",
-      "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.11.5.tgz",
-      "integrity": "sha512-f7Pq3wvg3GSPUPzR0F6bmI89Hdb+u9WXrSKc4v+N0aV0q6r42WoF92Jp2jEorBEBRoRNXgjp53nBniDXcqZYPA==",
+      "version": "1.12.1",
+      "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.12.1.tgz",
+      "integrity": "sha512-+X4WAlOisVWQMikjbcvY2e0rwPsKQ9F688lksZhBcPycBBuii3O7m8FACbDMWDojpAqvjIncrG8J0XHKyQfVeA==",
       "dev": true,
       "dependencies": {
-        "@webassemblyjs/ast": "1.11.5",
+        "@webassemblyjs/ast": "1.12.1",
         "@xtuc/long": "4.2.2"
       }
     },
+    "node_modules/@webgpu/types": {
+      "version": "0.1.44",
+      "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.44.tgz",
+      "integrity": "sha512-JDpYJN5E/asw84LTYhKyvPpxGnD+bAKPtpW9Ilurf7cZpxaTbxkQcGwOd7jgB9BPBrTYQ+32ufo4HiuomTjHNQ==",
+      "dev": true
+    },
     "node_modules/@webpack-cli/configtest": {
       "version": "2.0.1",
       "resolved": "https://registry.npmjs.org/@webpack-cli/configtest/-/configtest-2.0.1.tgz",
@@ -1836,9 +2240,9 @@
       }
     },
     "node_modules/acorn": {
-      "version": "8.8.2",
-      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.8.2.tgz",
-      "integrity": "sha512-xjIYgE8HBrkpd/sJqOGNspf8uHG+NOHGOw6a/Urj8taM2EXfdNAH2oFcPeIFfsv3+kz/mJrS5VuMqbNLjCa2vw==",
+      "version": "8.12.1",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.12.1.tgz",
+      "integrity": "sha512-tcpGyI9zbizT9JbV6oYE477V6mTlXvvi0T0G3SNIYE2apm/G5huBa1+K89VGeovbg+jycCrfhl3ADxErOuO6Jg==",
       "dev": true,
       "bin": {
         "acorn": "bin/acorn"
@@ -1847,10 +2251,10 @@
         "node": ">=0.4.0"
       }
     },
-    "node_modules/acorn-import-assertions": {
-      "version": "1.8.0",
-      "resolved": "https://registry.npmjs.org/acorn-import-assertions/-/acorn-import-assertions-1.8.0.tgz",
-      "integrity": "sha512-m7VZ3jwz4eK6A4Vtt8Ew1/mNbP24u0FhdyfA7fSvnJR6LMdfOYnmuIrrJAgrYfYJ10F/otaHTtrtrtmHdMNzEw==",
+    "node_modules/acorn-import-attributes": {
+      "version": "1.9.5",
+      "resolved": "https://registry.npmjs.org/acorn-import-attributes/-/acorn-import-attributes-1.9.5.tgz",
+      "integrity": "sha512-n02Vykv5uA3eHGM/Z2dQrcD56kL8TyDb2p1+0P83PClMnC/nc+anbQRhIOWnSq4Ke/KvDPrY3C9hDtC/A3eHnQ==",
       "dev": true,
       "peerDependencies": {
         "acorn": "^8"
@@ -1972,7 +2376,6 @@
       "version": "5.0.1",
       "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
       "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
-      "dev": true,
       "engines": {
         "node": ">=8"
       }
@@ -1981,7 +2384,6 @@
       "version": "4.3.0",
       "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
       "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
-      "dev": true,
       "dependencies": {
         "color-convert": "^2.0.1"
       },
@@ -2026,11 +2428,6 @@
       "integrity": "sha512-hNfzcOV8W4NdualtqBFPyVO+54DSJuZGY9qT4pRroB6S9e3iiido2ISIC5h9R2sPJ8H3FHCIiEnsv1lPXO3KtQ==",
       "dev": true
     },
-    "node_modules/b4a": {
-      "version": "1.6.4",
-      "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.4.tgz",
-      "integrity": "sha512-fpWrvyVHEKyeEvbKZTVOeZF3VSKKWtJxFIxX/jaVPf+cLbGUSitjb49pHLqPV2BUNNZ0LcoeEGfE/YCpyDYHIw=="
-    },
     "node_modules/babel-jest": {
       "version": "29.6.1",
       "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-29.6.1.tgz",
@@ -2134,27 +2531,7 @@
     "node_modules/balanced-match": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
-      "dev": true
-    },
-    "node_modules/base64-js": {
-      "version": "1.5.1",
-      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
-      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
+      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
     },
     "node_modules/batch": {
       "version": "0.6.1",
@@ -2171,16 +2548,6 @@
         "node": ">=8"
       }
     },
-    "node_modules/bl": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
-      "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
-      "dependencies": {
-        "buffer": "^5.5.0",
-        "inherits": "^2.0.4",
-        "readable-stream": "^3.4.0"
-      }
-    },
     "node_modules/bluebird": {
       "version": "3.7.2",
       "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.7.2.tgz",
@@ -2188,9 +2555,9 @@
       "dev": true
     },
     "node_modules/body-parser": {
-      "version": "1.20.2",
-      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.2.tgz",
-      "integrity": "sha512-ml9pReCu3M61kGlqoTm2umSXTlRTuGTx0bfYj+uIUKKYycG5NtSbeetV3faSU6R7ajOPw0g/J1PvK4qNy7s5bA==",
+      "version": "1.20.3",
+      "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-1.20.3.tgz",
+      "integrity": "sha512-7rAxByjUMqQ3/bHJy7D6OGXvx/MMc4IqBn/X0fcM1QUcAItpZrBEYhWGem+tzXH90c+G01ypMcYJBO9Y30203g==",
       "dev": true,
       "dependencies": {
         "bytes": "3.1.2",
@@ -2201,7 +2568,7 @@
         "http-errors": "2.0.0",
         "iconv-lite": "0.4.24",
         "on-finished": "2.4.1",
-        "qs": "6.11.0",
+        "qs": "6.13.0",
         "raw-body": "2.5.2",
         "type-is": "~1.6.18",
         "unpipe": "1.0.0"
@@ -2255,9 +2622,9 @@
       }
     },
     "node_modules/browserslist": {
-      "version": "4.21.9",
-      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.21.9.tgz",
-      "integrity": "sha512-M0MFoZzbUrRU4KNfCrDLnvyE7gub+peetoTid3TBIqtunaDJyXlwhakT+/VkvSXcfIzFfK/nkCs4nmyTmxdNSg==",
+      "version": "4.23.3",
+      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.23.3.tgz",
+      "integrity": "sha512-btwCFJVjI4YWDNfau8RhZ+B1Q/VLoUITrm3RlP6y1tYGWIOa+InuYiRGXUBXo8nA1qKmHMyLB/iVQg5TT4eFoA==",
       "dev": true,
       "funding": [
         {
@@ -2274,10 +2641,10 @@
         }
       ],
       "dependencies": {
-        "caniuse-lite": "^1.0.30001503",
-        "electron-to-chromium": "^1.4.431",
-        "node-releases": "^2.0.12",
-        "update-browserslist-db": "^1.0.11"
+        "caniuse-lite": "^1.0.30001646",
+        "electron-to-chromium": "^1.5.4",
+        "node-releases": "^2.0.18",
+        "update-browserslist-db": "^1.1.0"
       },
       "bin": {
         "browserslist": "cli.js"
@@ -2295,29 +2662,6 @@
         "node-int64": "^0.4.0"
       }
     },
-    "node_modules/buffer": {
-      "version": "5.7.1",
-      "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
-      "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "dependencies": {
-        "base64-js": "^1.3.1",
-        "ieee754": "^1.1.13"
-      }
-    },
     "node_modules/buffer-from": {
       "version": "1.1.2",
       "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz",
@@ -2394,9 +2738,9 @@
       }
     },
     "node_modules/caniuse-lite": {
-      "version": "1.0.30001513",
-      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001513.tgz",
-      "integrity": "sha512-pnjGJo7SOOjAGytZZ203Em95MRM8Cr6jhCXNF/FAXTpCTRTECnqQWLpiTRqrFtdYcth8hf4WECUpkezuYsMVww==",
+      "version": "1.0.30001653",
+      "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001653.tgz",
+      "integrity": "sha512-XGWQVB8wFQ2+9NZwZ10GxTYC5hk0Fa+q8cSkr0tgvMhYhMHP/QC+WTgrePMDBWiWc/pV+1ik82Al20XOK25Gcw==",
       "dev": true,
       "funding": [
         {
@@ -2490,9 +2834,12 @@
       }
     },
     "node_modules/chownr": {
-      "version": "1.1.4",
-      "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
-      "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg=="
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz",
+      "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==",
+      "engines": {
+        "node": ">=18"
+      }
     },
     "node_modules/chrome-trace-event": {
       "version": "1.0.3",
@@ -2814,119 +3161,30 @@
       "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==",
       "dev": true,
       "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/convert-source-map": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz",
-      "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==",
-      "dev": true
-    },
-    "node_modules/cookie": {
-      "version": "0.6.0",
-      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz",
-      "integrity": "sha512-U71cyTamuh1CRNCfpGY6to28lxvNwPG4Guz/EVjgf3Jmzv0vlDp1atT9eS5dDjMYHucpHbWns6Lwf3BKz6svdw==",
-      "dev": true,
-      "engines": {
-        "node": ">= 0.6"
-      }
-    },
-    "node_modules/cookie-signature": {
-      "version": "1.0.6",
-      "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz",
-      "integrity": "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==",
-      "dev": true
-    },
-    "node_modules/copy-webpack-plugin": {
-      "version": "11.0.0",
-      "resolved": "https://registry.npmjs.org/copy-webpack-plugin/-/copy-webpack-plugin-11.0.0.tgz",
-      "integrity": "sha512-fX2MWpamkW0hZxMEg0+mYnA40LTosOSa5TqZ9GYIBzyJa9C3QUaMPSE2xAi/buNr8u89SfD9wHSQVBzrRa/SOQ==",
-      "dev": true,
-      "dependencies": {
-        "fast-glob": "^3.2.11",
-        "glob-parent": "^6.0.1",
-        "globby": "^13.1.1",
-        "normalize-path": "^3.0.0",
-        "schema-utils": "^4.0.0",
-        "serialize-javascript": "^6.0.0"
-      },
-      "engines": {
-        "node": ">= 14.15.0"
-      },
-      "funding": {
-        "type": "opencollective",
-        "url": "https://opencollective.com/webpack"
-      },
-      "peerDependencies": {
-        "webpack": "^5.1.0"
-      }
-    },
-    "node_modules/copy-webpack-plugin/node_modules/ajv": {
-      "version": "8.12.0",
-      "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.12.0.tgz",
-      "integrity": "sha512-sRu1kpcO9yLtYxBKvqfTeh9KzZEwO3STyX1HT+4CaDzC6HpTGYhIhPIzj9XuKU7KYDwnaeh5hcOwjy1QuJzBPA==",
-      "dev": true,
-      "dependencies": {
-        "fast-deep-equal": "^3.1.1",
-        "json-schema-traverse": "^1.0.0",
-        "require-from-string": "^2.0.2",
-        "uri-js": "^4.2.2"
-      },
-      "funding": {
-        "type": "github",
-        "url": "https://github.com/sponsors/epoberezkin"
-      }
-    },
-    "node_modules/copy-webpack-plugin/node_modules/ajv-keywords": {
-      "version": "5.1.0",
-      "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-5.1.0.tgz",
-      "integrity": "sha512-YCS/JNFAUyr5vAuhk1DWm1CBxRHW9LbJ2ozWeemrIqpbsqKjHVxYPyi5GC0rjZIT5JxJ3virVTS8wk4i/Z+krw==",
-      "dev": true,
-      "dependencies": {
-        "fast-deep-equal": "^3.1.3"
-      },
-      "peerDependencies": {
-        "ajv": "^8.8.2"
-      }
-    },
-    "node_modules/copy-webpack-plugin/node_modules/glob-parent": {
-      "version": "6.0.2",
-      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz",
-      "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==",
-      "dev": true,
-      "dependencies": {
-        "is-glob": "^4.0.3"
-      },
-      "engines": {
-        "node": ">=10.13.0"
+        "node": ">= 0.6"
       }
     },
-    "node_modules/copy-webpack-plugin/node_modules/json-schema-traverse": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
-      "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
+    "node_modules/convert-source-map": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz",
+      "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==",
       "dev": true
     },
-    "node_modules/copy-webpack-plugin/node_modules/schema-utils": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-4.0.1.tgz",
-      "integrity": "sha512-lELhBAAly9NowEsX0yZBlw9ahZG+sK/1RJ21EpzdYHKEs13Vku3LJ+MIPhh4sMs0oCCeufZQEQbMekiA4vuVIQ==",
+    "node_modules/cookie": {
+      "version": "0.7.1",
+      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.1.tgz",
+      "integrity": "sha512-6DnInpx7SJ2AK3+CTUE/ZM0vWTUboZCegxhC2xiIydHR9jNuTAASBrfEpHhiGOZw/nX51bHt6YQl8jsGo4y/0w==",
       "dev": true,
-      "dependencies": {
-        "@types/json-schema": "^7.0.9",
-        "ajv": "^8.9.0",
-        "ajv-formats": "^2.1.1",
-        "ajv-keywords": "^5.1.0"
-      },
       "engines": {
-        "node": ">= 12.13.0"
-      },
-      "funding": {
-        "type": "opencollective",
-        "url": "https://opencollective.com/webpack"
+        "node": ">= 0.6"
       }
     },
+    "node_modules/cookie-signature": {
+      "version": "1.0.6",
+      "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.0.6.tgz",
+      "integrity": "sha512-QADzlaHc8icV8I7vbaJXJwod9HWYp8uCqf1xa4OfNu1T7JVxQIrUgOWtHdNDtPiywmFbiS12VjotIXLrKM3orQ==",
+      "dev": true
+    },
     "node_modules/core-util-is": {
       "version": "1.0.3",
       "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.3.tgz",
@@ -2937,7 +3195,6 @@
       "version": "7.0.3",
       "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
       "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
-      "dev": true,
       "dependencies": {
         "path-key": "^3.1.0",
         "shebang-command": "^2.0.0",
@@ -2956,20 +3213,6 @@
         "ms": "2.0.0"
       }
     },
-    "node_modules/decompress-response": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
-      "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
-      "dependencies": {
-        "mimic-response": "^3.1.0"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/dedent": {
       "version": "0.7.0",
       "resolved": "https://registry.npmjs.org/dedent/-/dedent-0.7.0.tgz",
@@ -2980,6 +3223,7 @@
       "version": "0.6.0",
       "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz",
       "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
+      "dev": true,
       "engines": {
         "node": ">=4.0.0"
       }
@@ -3051,9 +3295,9 @@
       }
     },
     "node_modules/detect-libc": {
-      "version": "2.0.2",
-      "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.2.tgz",
-      "integrity": "sha512-UX6sGumvvqSaXgdKGUsgZWqcUyIXZ/vZTrlRT/iobiKhGL0zL4d3osHj3uqllWJK+i+sixDS/3COVEOFbupFyw==",
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.0.3.tgz",
+      "integrity": "sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw==",
       "engines": {
         "node": ">=8"
       }
@@ -3082,18 +3326,6 @@
         "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
       }
     },
-    "node_modules/dir-glob": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz",
-      "integrity": "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==",
-      "dev": true,
-      "dependencies": {
-        "path-type": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=8"
-      }
-    },
     "node_modules/dmd": {
       "version": "6.2.0",
       "resolved": "https://registry.npmjs.org/dmd/-/dmd-6.2.0.tgz",
@@ -3135,6 +3367,11 @@
         "node": ">=6"
       }
     },
+    "node_modules/eastasianwidth": {
+      "version": "0.2.0",
+      "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz",
+      "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA=="
+    },
     "node_modules/ee-first": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
@@ -3142,9 +3379,9 @@
       "dev": true
     },
     "node_modules/electron-to-chromium": {
-      "version": "1.4.454",
-      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.454.tgz",
-      "integrity": "sha512-pmf1rbAStw8UEQ0sr2cdJtWl48ZMuPD9Sto8HVQOq9vx9j2WgDEN6lYoaqFvqEHYOmGA9oRGn7LqWI9ta0YugQ==",
+      "version": "1.5.13",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.13.tgz",
+      "integrity": "sha512-lbBcvtIJ4J6sS4tb5TLp1b4LyfCdMkwStzXPyAgVgTRAsep4bvrAGaBOP7ZJtQMNJpSQ9SqG4brWOroNaQtm7Q==",
       "dev": true
     },
     "node_modules/emittery": {
@@ -3162,30 +3399,21 @@
     "node_modules/emoji-regex": {
       "version": "8.0.0",
       "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
-      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
-      "dev": true
+      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="
     },
     "node_modules/encodeurl": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
-      "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==",
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz",
+      "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==",
       "dev": true,
       "engines": {
         "node": ">= 0.8"
       }
     },
-    "node_modules/end-of-stream": {
-      "version": "1.4.4",
-      "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz",
-      "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==",
-      "dependencies": {
-        "once": "^1.4.0"
-      }
-    },
     "node_modules/enhanced-resolve": {
-      "version": "5.13.0",
-      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.13.0.tgz",
-      "integrity": "sha512-eyV8f0y1+bzyfh8xAwW/WTSZpLbjhqc4ne9eGSH4Zo2ejdyiNG9pU6mf9DG8a7+Auk6MFTlNOT4Y2y/9k8GKVg==",
+      "version": "5.17.1",
+      "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.17.1.tgz",
+      "integrity": "sha512-LMHl3dXhTcfv8gM4kEzIUeTQ+7fpdA0l2tUf34BddXPkz2A5xJ5L/Pchd5BL6rdccM9QGvu0sWZzK1Z1t4wwyg==",
       "dev": true,
       "dependencies": {
         "graceful-fs": "^4.2.4",
@@ -3259,9 +3487,9 @@
       "dev": true
     },
     "node_modules/escalade": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz",
-      "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==",
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz",
+      "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==",
       "dev": true,
       "engines": {
         "node": ">=6"
@@ -3394,14 +3622,6 @@
         "node": ">= 0.8.0"
       }
     },
-    "node_modules/expand-template": {
-      "version": "2.0.3",
-      "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
-      "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==",
-      "engines": {
-        "node": ">=6"
-      }
-    },
     "node_modules/expect": {
       "version": "29.6.1",
       "resolved": "https://registry.npmjs.org/expect/-/expect-29.6.1.tgz",
@@ -3420,37 +3640,37 @@
       }
     },
     "node_modules/express": {
-      "version": "4.19.2",
-      "resolved": "https://registry.npmjs.org/express/-/express-4.19.2.tgz",
-      "integrity": "sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==",
+      "version": "4.21.1",
+      "resolved": "https://registry.npmjs.org/express/-/express-4.21.1.tgz",
+      "integrity": "sha512-YSFlK1Ee0/GC8QaO91tHcDxJiE/X4FbpAyQWkxAvG6AXCuR65YzK8ua6D9hvi/TzUfZMpc+BwuM1IPw8fmQBiQ==",
       "dev": true,
       "dependencies": {
         "accepts": "~1.3.8",
         "array-flatten": "1.1.1",
-        "body-parser": "1.20.2",
+        "body-parser": "1.20.3",
         "content-disposition": "0.5.4",
         "content-type": "~1.0.4",
-        "cookie": "0.6.0",
+        "cookie": "0.7.1",
         "cookie-signature": "1.0.6",
         "debug": "2.6.9",
         "depd": "2.0.0",
-        "encodeurl": "~1.0.2",
+        "encodeurl": "~2.0.0",
         "escape-html": "~1.0.3",
         "etag": "~1.8.1",
-        "finalhandler": "1.2.0",
+        "finalhandler": "1.3.1",
         "fresh": "0.5.2",
         "http-errors": "2.0.0",
-        "merge-descriptors": "1.0.1",
+        "merge-descriptors": "1.0.3",
         "methods": "~1.1.2",
         "on-finished": "2.4.1",
         "parseurl": "~1.3.3",
-        "path-to-regexp": "0.1.7",
+        "path-to-regexp": "0.1.10",
         "proxy-addr": "~2.0.7",
-        "qs": "6.11.0",
+        "qs": "6.13.0",
         "range-parser": "~1.2.1",
         "safe-buffer": "5.2.1",
-        "send": "0.18.0",
-        "serve-static": "1.15.0",
+        "send": "0.19.0",
+        "serve-static": "1.16.2",
         "setprototypeof": "1.2.0",
         "statuses": "2.0.1",
         "type-is": "~1.6.18",
@@ -3473,27 +3693,6 @@
       "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==",
       "dev": true
     },
-    "node_modules/fast-fifo": {
-      "version": "1.3.2",
-      "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz",
-      "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ=="
-    },
-    "node_modules/fast-glob": {
-      "version": "3.2.12",
-      "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.2.12.tgz",
-      "integrity": "sha512-DVj4CQIYYow0BlaelwK1pHl5n5cRSJfM60UA0zK891sVInoPri2Ekj7+e1CT3/3qxXenpI+nBBmQAcJPJgaj4w==",
-      "dev": true,
-      "dependencies": {
-        "@nodelib/fs.stat": "^2.0.2",
-        "@nodelib/fs.walk": "^1.2.3",
-        "glob-parent": "^5.1.2",
-        "merge2": "^1.3.0",
-        "micromatch": "^4.0.4"
-      },
-      "engines": {
-        "node": ">=8.6.0"
-      }
-    },
     "node_modules/fast-json-stable-stringify": {
       "version": "2.1.0",
       "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz",
@@ -3509,15 +3708,6 @@
         "node": ">= 4.9.1"
       }
     },
-    "node_modules/fastq": {
-      "version": "1.15.0",
-      "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.15.0.tgz",
-      "integrity": "sha512-wBrocU2LCXXa+lWBt8RoIRD89Fi8OdABODa/kEnyeyjS5aZO5/GNvI5sEINADqP/h8M29UHTHUb53sUu5Ihqdw==",
-      "dev": true,
-      "dependencies": {
-        "reusify": "^1.0.4"
-      }
-    },
     "node_modules/faye-websocket": {
       "version": "0.11.4",
       "resolved": "https://registry.npmjs.org/faye-websocket/-/faye-websocket-0.11.4.tgz",
@@ -3574,13 +3764,13 @@
       }
     },
     "node_modules/finalhandler": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.2.0.tgz",
-      "integrity": "sha512-5uXcUVftlQMFnWC9qu/svkWv3GTd2PfUhK/3PLkYNAe7FbqJMt3515HaxE6eRL74GdsriiwujiawdaB1BpEISg==",
+      "version": "1.3.1",
+      "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-1.3.1.tgz",
+      "integrity": "sha512-6BN9trH7bp3qvnrRyzsBz+g3lZxTNZTbVO2EV1CS0WIcDbawYVdYvGflME/9QP0h0pYlCDBCTjYa9nZzMDpyxQ==",
       "dev": true,
       "dependencies": {
         "debug": "2.6.9",
-        "encodeurl": "~1.0.2",
+        "encodeurl": "~2.0.0",
         "escape-html": "~1.0.3",
         "on-finished": "2.4.1",
         "parseurl": "~1.3.3",
@@ -3650,6 +3840,32 @@
         }
       }
     },
+    "node_modules/foreground-child": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.1.1.tgz",
+      "integrity": "sha512-TMKDUnIte6bfb5nWv7V/caI169OHgvwjb7V4WkeUvbQQdjr5rWKqHFiKWb/fcOwB+CzBT+qbWjvj+DVwRskpIg==",
+      "dependencies": {
+        "cross-spawn": "^7.0.0",
+        "signal-exit": "^4.0.1"
+      },
+      "engines": {
+        "node": ">=14"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/foreground-child/node_modules/signal-exit": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz",
+      "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==",
+      "engines": {
+        "node": ">=14"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
     "node_modules/forwarded": {
       "version": "0.2.0",
       "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz",
@@ -3668,11 +3884,6 @@
         "node": ">= 0.6"
       }
     },
-    "node_modules/fs-constants": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
-      "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow=="
-    },
     "node_modules/fs-monkey": {
       "version": "1.0.3",
       "resolved": "https://registry.npmjs.org/fs-monkey/-/fs-monkey-1.0.3.tgz",
@@ -3695,9 +3906,9 @@
       "dev": true
     },
     "node_modules/fsevents": {
-      "version": "2.3.2",
-      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz",
-      "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==",
+      "version": "2.3.3",
+      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
+      "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
       "dev": true,
       "hasInstallScript": true,
       "optional": true,
@@ -3775,11 +3986,6 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/github-from-package": {
-      "version": "0.0.0",
-      "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz",
-      "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw=="
-    },
     "node_modules/glob": {
       "version": "7.2.3",
       "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
@@ -3827,25 +4033,6 @@
         "node": ">=4"
       }
     },
-    "node_modules/globby": {
-      "version": "13.1.4",
-      "resolved": "https://registry.npmjs.org/globby/-/globby-13.1.4.tgz",
-      "integrity": "sha512-iui/IiiW+QrJ1X1hKH5qwlMQyv34wJAYwH1vrf8b9kBA4sNiif3gKsMHa+BrdnOpEudWjpotfa7LrTzB1ERS/g==",
-      "dev": true,
-      "dependencies": {
-        "dir-glob": "^3.0.1",
-        "fast-glob": "^3.2.11",
-        "ignore": "^5.2.0",
-        "merge2": "^1.4.1",
-        "slash": "^4.0.0"
-      },
-      "engines": {
-        "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/gopd": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz",
@@ -3859,9 +4046,9 @@
       }
     },
     "node_modules/graceful-fs": {
-      "version": "4.2.10",
-      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.10.tgz",
-      "integrity": "sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA==",
+      "version": "4.2.11",
+      "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz",
+      "integrity": "sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==",
       "dev": true
     },
     "node_modules/guid-typescript": {
@@ -4106,34 +4293,6 @@
         "node": ">=0.10.0"
       }
     },
-    "node_modules/ieee754": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
-      "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
-    "node_modules/ignore": {
-      "version": "5.2.4",
-      "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.2.4.tgz",
-      "integrity": "sha512-MAb38BcSbH0eHNBxn7ql2NH/kX33OkB3lZ1BNdh7ENeRChHTYsTvWrMubiIAMNS2llXEEgZ1MUOBtXChP3kaFQ==",
-      "dev": true,
-      "engines": {
-        "node": ">= 4"
-      }
-    },
     "node_modules/import-local": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/import-local/-/import-local-3.1.0.tgz",
@@ -4175,12 +4334,8 @@
     "node_modules/inherits": {
       "version": "2.0.4",
       "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
-      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ=="
-    },
-    "node_modules/ini": {
-      "version": "1.3.8",
-      "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
-      "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew=="
+      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
+      "dev": true
     },
     "node_modules/interpret": {
       "version": "3.1.1",
@@ -4257,7 +4412,6 @@
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
       "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
-      "dev": true,
       "engines": {
         "node": ">=8"
       }
@@ -4349,8 +4503,7 @@
     "node_modules/isexe": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
-      "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
-      "dev": true
+      "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw=="
     },
     "node_modules/isobject": {
       "version": "3.0.1",
@@ -4386,6 +4539,15 @@
         "node": ">=8"
       }
     },
+    "node_modules/istanbul-lib-instrument/node_modules/semver": {
+      "version": "6.3.1",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
+      "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==",
+      "dev": true,
+      "bin": {
+        "semver": "bin/semver.js"
+      }
+    },
     "node_modules/istanbul-lib-report": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/istanbul-lib-report/-/istanbul-lib-report-3.0.0.tgz",
@@ -4462,6 +4624,23 @@
         "node": ">=8"
       }
     },
+    "node_modules/jackspeak": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.1.2.tgz",
+      "integrity": "sha512-kWmLKn2tRtfYMF/BakihVVRzBKOxz4gJMiL2Rj91WnAB5TPZumSH99R/Yf1qE1u4uRimvCSJfm6hnxohXeEXjQ==",
+      "dependencies": {
+        "@isaacs/cliui": "^8.0.2"
+      },
+      "engines": {
+        "node": ">=14"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      },
+      "optionalDependencies": {
+        "@pkgjs/parseargs": "^0.11.0"
+      }
+    },
     "node_modules/jest": {
       "version": "29.6.1",
       "resolved": "https://registry.npmjs.org/jest/-/jest-29.6.1.tgz",
@@ -5422,20 +5601,9 @@
       "dev": true
     },
     "node_modules/long": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
-      "integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA=="
-    },
-    "node_modules/lru-cache": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz",
-      "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==",
-      "dependencies": {
-        "yallist": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=10"
-      }
+      "version": "5.2.3",
+      "resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz",
+      "integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q=="
     },
     "node_modules/make-dir": {
       "version": "3.1.0",
@@ -5452,6 +5620,15 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/make-dir/node_modules/semver": {
+      "version": "6.3.1",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz",
+      "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==",
+      "dev": true,
+      "bin": {
+        "semver": "bin/semver.js"
+      }
+    },
     "node_modules/makeerror": {
       "version": "1.0.12",
       "resolved": "https://registry.npmjs.org/makeerror/-/makeerror-1.0.12.tgz",
@@ -5527,10 +5704,13 @@
       }
     },
     "node_modules/merge-descriptors": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz",
-      "integrity": "sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w==",
-      "dev": true
+      "version": "1.0.3",
+      "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz",
+      "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==",
+      "dev": true,
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
     },
     "node_modules/merge-stream": {
       "version": "2.0.0",
@@ -5538,15 +5718,6 @@
       "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==",
       "dev": true
     },
-    "node_modules/merge2": {
-      "version": "1.4.1",
-      "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz",
-      "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==",
-      "dev": true,
-      "engines": {
-        "node": ">= 8"
-      }
-    },
     "node_modules/methods": {
       "version": "1.1.2",
       "resolved": "https://registry.npmjs.org/methods/-/methods-1.1.2.tgz",
@@ -5557,12 +5728,12 @@
       }
     },
     "node_modules/micromatch": {
-      "version": "4.0.5",
-      "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz",
-      "integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==",
+      "version": "4.0.8",
+      "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz",
+      "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==",
       "dev": true,
       "dependencies": {
-        "braces": "^3.0.2",
+        "braces": "^3.0.3",
         "picomatch": "^2.3.1"
       },
       "engines": {
@@ -5611,17 +5782,6 @@
         "node": ">=6"
       }
     },
-    "node_modules/mimic-response": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
-      "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==",
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/minimalistic-assert": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/minimalistic-assert/-/minimalistic-assert-1.0.1.tgz",
@@ -5644,10 +5804,91 @@
       "version": "1.2.8",
       "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
       "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
+      "dev": true,
       "funding": {
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/minipass": {
+      "version": "7.1.2",
+      "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz",
+      "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==",
+      "engines": {
+        "node": ">=16 || 14 >=14.17"
+      }
+    },
+    "node_modules/minizlib": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.1.tgz",
+      "integrity": "sha512-umcy022ILvb5/3Djuu8LWeqUa8D68JaBzlttKeMWen48SjabqS3iY5w/vzeMzMUNhLDifyhbOwKDSznB1vvrwg==",
+      "dependencies": {
+        "minipass": "^7.0.4",
+        "rimraf": "^5.0.5"
+      },
+      "engines": {
+        "node": ">= 18"
+      }
+    },
+    "node_modules/minizlib/node_modules/brace-expansion": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz",
+      "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==",
+      "dependencies": {
+        "balanced-match": "^1.0.0"
+      }
+    },
+    "node_modules/minizlib/node_modules/glob": {
+      "version": "10.4.1",
+      "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.1.tgz",
+      "integrity": "sha512-2jelhlq3E4ho74ZyVLN03oKdAZVUa6UDZzFLVH1H7dnoax+y9qyaq8zBkfDIggjniU19z0wU18y16jMB2eyVIw==",
+      "dependencies": {
+        "foreground-child": "^3.1.0",
+        "jackspeak": "^3.1.2",
+        "minimatch": "^9.0.4",
+        "minipass": "^7.1.2",
+        "path-scurry": "^1.11.1"
+      },
+      "bin": {
+        "glob": "dist/esm/bin.mjs"
+      },
+      "engines": {
+        "node": ">=16 || 14 >=14.18"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/minizlib/node_modules/minimatch": {
+      "version": "9.0.4",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.4.tgz",
+      "integrity": "sha512-KqWh+VchfxcMNRAJjj2tnsSJdNbHsVgnkBhTNrW7AjVo6OvLtxw8zfT9oLw1JSohlFzJ8jCoTgaoXvJ+kHt6fw==",
+      "dependencies": {
+        "brace-expansion": "^2.0.1"
+      },
+      "engines": {
+        "node": ">=16 || 14 >=14.17"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/minizlib/node_modules/rimraf": {
+      "version": "5.0.7",
+      "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.7.tgz",
+      "integrity": "sha512-nV6YcJo5wbLW77m+8KjH8aB/7/rxQy9SZ0HY5shnwULfS+9nmTtVXAJET5NdZmCzA4fPI/Hm1wo/Po/4mopOdg==",
+      "dependencies": {
+        "glob": "^10.3.7"
+      },
+      "bin": {
+        "rimraf": "dist/esm/bin.mjs"
+      },
+      "engines": {
+        "node": ">=14.18"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
     "node_modules/mkdirp": {
       "version": "1.0.4",
       "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-1.0.4.tgz",
@@ -5660,11 +5901,6 @@
         "node": ">=10"
       }
     },
-    "node_modules/mkdirp-classic": {
-      "version": "0.5.3",
-      "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz",
-      "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A=="
-    },
     "node_modules/mkdirp2": {
       "version": "1.0.5",
       "resolved": "https://registry.npmjs.org/mkdirp2/-/mkdirp2-1.0.5.tgz",
@@ -5690,11 +5926,6 @@
         "multicast-dns": "cli.js"
       }
     },
-    "node_modules/napi-build-utils": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-1.0.2.tgz",
-      "integrity": "sha512-ONmRUqK7zj7DWX0D9ADe03wbwOBZxNAfF20PlGfCWQcD3+/MakShIHrMqx9YwPTfxDdF1zLeL+RGZiR9kGMLdg=="
-    },
     "node_modules/natural-compare": {
       "version": "1.4.0",
       "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz",
@@ -5711,26 +5942,10 @@
       }
     },
     "node_modules/neo-async": {
-      "version": "2.6.2",
-      "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz",
-      "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==",
-      "dev": true
-    },
-    "node_modules/node-abi": {
-      "version": "3.35.0",
-      "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.35.0.tgz",
-      "integrity": "sha512-jAlSOFR1Bls963NmFwxeQkNTzqjUF0NThm8Le7eRIRGzFUVJuMOFZDLv5Y30W/Oaw+KEebEJLAigwO9gQHoEmw==",
-      "dependencies": {
-        "semver": "^7.3.5"
-      },
-      "engines": {
-        "node": ">=10"
-      }
-    },
-    "node_modules/node-addon-api": {
-      "version": "6.1.0",
-      "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-6.1.0.tgz",
-      "integrity": "sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA=="
+      "version": "2.6.2",
+      "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz",
+      "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==",
+      "dev": true
     },
     "node_modules/node-forge": {
       "version": "1.3.1",
@@ -5748,9 +5963,9 @@
       "dev": true
     },
     "node_modules/node-releases": {
-      "version": "2.0.13",
-      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.13.tgz",
-      "integrity": "sha512-uYr7J37ae/ORWdZeQ1xxMJe3NtdmqMC/JZK+geofDrkLUApKRHPd18/TxtBOJ4A0/+uUIliorNrfYV6s1b02eQ==",
+      "version": "2.0.18",
+      "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.18.tgz",
+      "integrity": "sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g==",
       "dev": true
     },
     "node_modules/normalize-path": {
@@ -5781,10 +5996,13 @@
       "dev": true
     },
     "node_modules/object-inspect": {
-      "version": "1.13.1",
-      "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.1.tgz",
-      "integrity": "sha512-5qoj1RUiKOMsCCNLV1CBiPYE10sziTsnmNxkAI/rZhiD63CF7IqdFGC/XzjWjpSgLf0LxXX3bDFIh0E18f6UhQ==",
+      "version": "1.13.2",
+      "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.2.tgz",
+      "integrity": "sha512-IRZSRuzJiynemAXPYtPe5BoI/RESNYR7TYm50MC5Mqbd3Jmw5y790sErYw3V6SryFJD64b74qQQs9wn5Bg/k3g==",
       "dev": true,
+      "engines": {
+        "node": ">= 0.4"
+      },
       "funding": {
         "url": "https://github.com/sponsors/ljharb"
       }
@@ -5829,6 +6047,7 @@
       "version": "1.4.0",
       "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
       "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
+      "dev": true,
       "dependencies": {
         "wrappy": "1"
       }
@@ -5848,46 +6067,44 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/onnx-proto": {
-      "version": "4.0.4",
-      "resolved": "https://registry.npmjs.org/onnx-proto/-/onnx-proto-4.0.4.tgz",
-      "integrity": "sha512-aldMOB3HRoo6q/phyB6QRQxSt895HNNw82BNyZ2CMh4bjeKv7g/c+VpAFtJuEMVfYLMbRx61hbuqnKceLeDcDA==",
-      "dependencies": {
-        "protobufjs": "^6.8.8"
-      }
-    },
     "node_modules/onnxruntime-common": {
-      "version": "1.14.0",
-      "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.14.0.tgz",
-      "integrity": "sha512-3LJpegM2iMNRX2wUmtYfeX/ytfOzNwAWKSq1HbRrKc9+uqG/FsEA0bbKZl1btQeZaXhC26l44NWpNUeXPII7Ew=="
+      "version": "1.19.2",
+      "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.19.2.tgz",
+      "integrity": "sha512-a4R7wYEVFbZBlp0BfhpbFWqe4opCor3KM+5Wm22Az3NGDcQMiU2hfG/0MfnBs+1ZrlSGmlgWeMcXQkDk1UFb8Q=="
     },
     "node_modules/onnxruntime-node": {
-      "version": "1.14.0",
-      "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.14.0.tgz",
-      "integrity": "sha512-5ba7TWomIV/9b6NH/1x/8QEeowsb+jBEvFzU6z0T4mNsFwdPqXeFUM7uxC6QeSRkEbWu3qEB0VMjrvzN/0S9+w==",
-      "optional": true,
+      "version": "1.19.2",
+      "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.19.2.tgz",
+      "integrity": "sha512-9eHMP/HKbbeUcqte1JYzaaRC8JPn7ojWeCeoyShO86TOR97OCyIyAIOGX3V95ErjslVhJRXY8Em/caIUc0hm1Q==",
+      "hasInstallScript": true,
       "os": [
         "win32",
         "darwin",
         "linux"
       ],
       "dependencies": {
-        "onnxruntime-common": "~1.14.0"
+        "onnxruntime-common": "1.19.2",
+        "tar": "^7.0.1"
       }
     },
     "node_modules/onnxruntime-web": {
-      "version": "1.14.0",
-      "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.14.0.tgz",
-      "integrity": "sha512-Kcqf43UMfW8mCydVGcX9OMXI2VN17c0p6XvR7IPSZzBf/6lteBzXHvcEVWDPmCKuGombl997HgLqj91F11DzXw==",
+      "version": "1.20.0-dev.20241016-2b8fc5529b",
+      "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.20.0-dev.20241016-2b8fc5529b.tgz",
+      "integrity": "sha512-1XovqtgqeEFtupuyzdDQo7Tqj4GRyNHzOoXjapCEo4rfH3JrXok5VtqucWfRXHPsOI5qoNxMQ9VE+drDIp6woQ==",
       "dependencies": {
         "flatbuffers": "^1.12.0",
         "guid-typescript": "^1.0.9",
-        "long": "^4.0.0",
-        "onnx-proto": "^4.0.4",
-        "onnxruntime-common": "~1.14.0",
-        "platform": "^1.3.6"
+        "long": "^5.2.3",
+        "onnxruntime-common": "1.20.0-dev.20241016-2b8fc5529b",
+        "platform": "^1.3.6",
+        "protobufjs": "^7.2.4"
       }
     },
+    "node_modules/onnxruntime-web/node_modules/onnxruntime-common": {
+      "version": "1.20.0-dev.20241016-2b8fc5529b",
+      "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.20.0-dev.20241016-2b8fc5529b.tgz",
+      "integrity": "sha512-KZK8b6zCYGZFjd4ANze0pqBnqnFTS3GIVeclQpa2qseDpXrCQJfkWBixRcrZShNhm3LpFOZ8qJYFC5/qsJK9WQ=="
+    },
     "node_modules/open": {
       "version": "8.4.2",
       "resolved": "https://registry.npmjs.org/open/-/open-8.4.2.tgz",
@@ -6003,7 +6220,6 @@
       "version": "3.1.1",
       "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
       "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
-      "dev": true,
       "engines": {
         "node": ">=8"
       }
@@ -6014,25 +6230,39 @@
       "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==",
       "dev": true
     },
-    "node_modules/path-to-regexp": {
-      "version": "0.1.7",
-      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.7.tgz",
-      "integrity": "sha512-5DFkuoqlv1uYQKxy8omFBeJPQcdoE07Kv2sferDCrAq1ohOU+MSDswDIbnx3YAM60qIOnYa53wBhXW0EbMonrQ==",
-      "dev": true
+    "node_modules/path-scurry": {
+      "version": "1.11.1",
+      "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz",
+      "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==",
+      "dependencies": {
+        "lru-cache": "^10.2.0",
+        "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0"
+      },
+      "engines": {
+        "node": ">=16 || 14 >=14.18"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
     },
-    "node_modules/path-type": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz",
-      "integrity": "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==",
-      "dev": true,
+    "node_modules/path-scurry/node_modules/lru-cache": {
+      "version": "10.2.2",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.2.2.tgz",
+      "integrity": "sha512-9hp3Vp2/hFQUiIwKo8XCeFVnrg8Pk3TYNPIR7tJADKi5YfcF7vEaK7avFHTlSy3kOKYaJQaalfEo6YuXdceBOQ==",
       "engines": {
-        "node": ">=8"
+        "node": "14 || >=16.14"
       }
     },
+    "node_modules/path-to-regexp": {
+      "version": "0.1.10",
+      "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-0.1.10.tgz",
+      "integrity": "sha512-7lf7qcQidTku0Gu3YDPc8DJ1q7OOucfa/BSsIwjuh56VU7katFvuM8hULfkwB3Fns/rsVF7PwPKVw1sl5KQS9w==",
+      "dev": true
+    },
     "node_modules/picocolors": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.0.tgz",
-      "integrity": "sha512-1fygroTLlHu66zi26VoTDv8yRgm0Fccecssto+MhsZ0D/DGW2sm8E8AjW7NU5VVTRt5GxbeZ5qBuJr+HyLYkjQ==",
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.1.tgz",
+      "integrity": "sha512-anP1Z8qwhkbmu7MFP5iTt+wQKXgwzf7zTyGlcdzabySa9vd0Xt392U0rVmz9poOaBj0uHJKyyo9/upk0HrEQew==",
       "dev": true
     },
     "node_modules/picomatch": {
@@ -6073,29 +6303,19 @@
       "resolved": "https://registry.npmjs.org/platform/-/platform-1.3.6.tgz",
       "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg=="
     },
-    "node_modules/prebuild-install": {
-      "version": "7.1.1",
-      "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.1.tgz",
-      "integrity": "sha512-jAXscXWMcCK8GgCoHOfIr0ODh5ai8mj63L2nWrjuAgXE6tDyYGnx4/8o/rCgU+B4JSyZBKbeZqzhtwtC3ovxjw==",
-      "dependencies": {
-        "detect-libc": "^2.0.0",
-        "expand-template": "^2.0.3",
-        "github-from-package": "0.0.0",
-        "minimist": "^1.2.3",
-        "mkdirp-classic": "^0.5.3",
-        "napi-build-utils": "^1.0.1",
-        "node-abi": "^3.3.0",
-        "pump": "^3.0.0",
-        "rc": "^1.2.7",
-        "simple-get": "^4.0.0",
-        "tar-fs": "^2.0.0",
-        "tunnel-agent": "^0.6.0"
-      },
+    "node_modules/prettier": {
+      "version": "3.3.3",
+      "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.3.3.tgz",
+      "integrity": "sha512-i2tDNA0O5IrMO757lfrdQZCc2jPNDVntV0m/+4whiDfWaTKfMNgR7Qz0NAeGz/nRqF4m5/6CLzbP4/liHt12Ew==",
+      "dev": true,
       "bin": {
-        "prebuild-install": "bin.js"
+        "prettier": "bin/prettier.cjs"
       },
       "engines": {
-        "node": ">=10"
+        "node": ">=14"
+      },
+      "funding": {
+        "url": "https://github.com/prettier/prettier?sponsor=1"
       }
     },
     "node_modules/pretty-format": {
@@ -6166,11 +6386,6 @@
         "node": ">=12.0.0"
       }
     },
-    "node_modules/protobufjs/node_modules/long": {
-      "version": "5.2.3",
-      "resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz",
-      "integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q=="
-    },
     "node_modules/proxy-addr": {
       "version": "2.0.7",
       "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz",
@@ -6193,15 +6408,6 @@
         "node": ">= 0.10"
       }
     },
-    "node_modules/pump": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz",
-      "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==",
-      "dependencies": {
-        "end-of-stream": "^1.1.0",
-        "once": "^1.3.1"
-      }
-    },
     "node_modules/punycode": {
       "version": "2.3.0",
       "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz",
@@ -6228,12 +6434,12 @@
       ]
     },
     "node_modules/qs": {
-      "version": "6.11.0",
-      "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.0.tgz",
-      "integrity": "sha512-MvjoMCJwEarSbUYk5O+nmoSzSutSsTwF85zcHPQ9OrlFoZOYIjaqBAJIqIXjptyD5vThxGq52Xu/MaJzRkIk4Q==",
+      "version": "6.13.0",
+      "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz",
+      "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==",
       "dev": true,
       "dependencies": {
-        "side-channel": "^1.0.4"
+        "side-channel": "^1.0.6"
       },
       "engines": {
         "node": ">=0.6"
@@ -6242,31 +6448,6 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/queue-microtask": {
-      "version": "1.2.3",
-      "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
-      "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==",
-      "dev": true,
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
-    "node_modules/queue-tick": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/queue-tick/-/queue-tick-1.0.1.tgz",
-      "integrity": "sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag=="
-    },
     "node_modules/randombytes": {
       "version": "2.1.0",
       "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz",
@@ -6309,20 +6490,6 @@
         "node": ">= 0.8"
       }
     },
-    "node_modules/rc": {
-      "version": "1.2.8",
-      "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz",
-      "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==",
-      "dependencies": {
-        "deep-extend": "^0.6.0",
-        "ini": "~1.3.0",
-        "minimist": "^1.2.0",
-        "strip-json-comments": "~2.0.1"
-      },
-      "bin": {
-        "rc": "cli.js"
-      }
-    },
     "node_modules/react-is": {
       "version": "18.2.0",
       "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.2.0.tgz",
@@ -6333,6 +6500,7 @@
       "version": "3.6.1",
       "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.1.tgz",
       "integrity": "sha512-+rQmrWMYGA90yenhTYsLWAsLsqVC8osOw6PKE1HDYiO0gdPeKe/xDHNzIAIn4C91YQ6oenEhfYqqc1883qHbjQ==",
+      "dev": true,
       "dependencies": {
         "inherits": "^2.0.3",
         "string_decoder": "^1.1.1",
@@ -6547,16 +6715,6 @@
         "node": ">= 4"
       }
     },
-    "node_modules/reusify": {
-      "version": "1.0.4",
-      "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz",
-      "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==",
-      "dev": true,
-      "engines": {
-        "iojs": ">=1.0.0",
-        "node": ">=0.10.0"
-      }
-    },
     "node_modules/rimraf": {
       "version": "3.0.2",
       "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz",
@@ -6572,33 +6730,11 @@
         "url": "https://github.com/sponsors/isaacs"
       }
     },
-    "node_modules/run-parallel": {
-      "version": "1.2.0",
-      "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz",
-      "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==",
-      "dev": true,
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "dependencies": {
-        "queue-microtask": "^1.2.2"
-      }
-    },
     "node_modules/safe-buffer": {
       "version": "5.2.1",
       "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
       "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
+      "dev": true,
       "funding": [
         {
           "type": "github",
@@ -6621,9 +6757,9 @@
       "dev": true
     },
     "node_modules/schema-utils": {
-      "version": "3.1.2",
-      "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.1.2.tgz",
-      "integrity": "sha512-pvjEHOgWc9OWA/f/DE3ohBWTD6EleVLf7iFUkoSwAxttdBhB9QUebQgxER2kWueOvRJXPHNnyrvvh9eZINB8Eg==",
+      "version": "3.3.0",
+      "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.3.0.tgz",
+      "integrity": "sha512-pN/yOAvcC+5rQ5nERGuwrjLlYvLTbCibnZ1I7B1LaiAz9BRBlE9GMgE/eqV30P7aJQUf7Ddimy/RsbYO/GrVGg==",
       "dev": true,
       "dependencies": {
         "@types/json-schema": "^7.0.8",
@@ -6657,12 +6793,10 @@
       }
     },
     "node_modules/semver": {
-      "version": "7.5.4",
-      "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz",
-      "integrity": "sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==",
-      "dependencies": {
-        "lru-cache": "^6.0.0"
-      },
+      "version": "7.6.3",
+      "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz",
+      "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==",
+      "license": "ISC",
       "bin": {
         "semver": "bin/semver.js"
       },
@@ -6671,9 +6805,9 @@
       }
     },
     "node_modules/send": {
-      "version": "0.18.0",
-      "resolved": "https://registry.npmjs.org/send/-/send-0.18.0.tgz",
-      "integrity": "sha512-qqWzuOjSFOuqPjFe4NOsMLafToQQwBSOEpS+FwEt3A2V3vKubTquT3vmLTQpFgMXp8AlFWFuP1qKaJZOtPpVXg==",
+      "version": "0.19.0",
+      "resolved": "https://registry.npmjs.org/send/-/send-0.19.0.tgz",
+      "integrity": "sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw==",
       "dev": true,
       "dependencies": {
         "debug": "2.6.9",
@@ -6694,6 +6828,15 @@
         "node": ">= 0.8.0"
       }
     },
+    "node_modules/send/node_modules/encodeurl": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-1.0.2.tgz",
+      "integrity": "sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==",
+      "dev": true,
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
     "node_modules/send/node_modules/ms": {
       "version": "2.1.3",
       "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
@@ -6701,9 +6844,9 @@
       "dev": true
     },
     "node_modules/serialize-javascript": {
-      "version": "6.0.1",
-      "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.1.tgz",
-      "integrity": "sha512-owoXEFjWRllis8/M1Q+Cw5k8ZH40e3zhp/ovX+Xr/vi1qj6QesbyXXViFbpNvWvPNAD62SutwEXavefrLJWj7w==",
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.2.tgz",
+      "integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==",
       "dev": true,
       "dependencies": {
         "randombytes": "^2.1.0"
@@ -6773,15 +6916,15 @@
       }
     },
     "node_modules/serve-static": {
-      "version": "1.15.0",
-      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.15.0.tgz",
-      "integrity": "sha512-XGuRDNjXUijsUL0vl6nSD7cwURuzEgglbOaFuZM9g3kwDXOWVTck0jLzjPzGD+TazWbboZYu52/9/XPdUgne9g==",
+      "version": "1.16.2",
+      "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-1.16.2.tgz",
+      "integrity": "sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw==",
       "dev": true,
       "dependencies": {
-        "encodeurl": "~1.0.2",
+        "encodeurl": "~2.0.0",
         "escape-html": "~1.0.3",
         "parseurl": "~1.3.3",
-        "send": "0.18.0"
+        "send": "0.19.0"
       },
       "engines": {
         "node": ">= 0.8.0"
@@ -6823,52 +6966,47 @@
       }
     },
     "node_modules/sharp": {
-      "version": "0.32.6",
-      "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.32.6.tgz",
-      "integrity": "sha512-KyLTWwgcR9Oe4d9HwCwNM2l7+J0dUQwn/yf7S0EnTtb0eVS4RxO0eUSvxPtzT4F3SY+C4K6fqdv/DO27sJ/v/w==",
+      "version": "0.33.5",
+      "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.33.5.tgz",
+      "integrity": "sha512-haPVm1EkS9pgvHrQ/F3Xy+hgcuMV0Wm9vfIBSiwZ05k+xgb0PkBQpGsAA/oWdDobNaZTH5ppvHtzCFbnSEwHVw==",
       "hasInstallScript": true,
       "dependencies": {
         "color": "^4.2.3",
-        "detect-libc": "^2.0.2",
-        "node-addon-api": "^6.1.0",
-        "prebuild-install": "^7.1.1",
-        "semver": "^7.5.4",
-        "simple-get": "^4.0.1",
-        "tar-fs": "^3.0.4",
-        "tunnel-agent": "^0.6.0"
+        "detect-libc": "^2.0.3",
+        "semver": "^7.6.3"
       },
       "engines": {
-        "node": ">=14.15.0"
+        "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
       },
       "funding": {
         "url": "https://opencollective.com/libvips"
-      }
-    },
-    "node_modules/sharp/node_modules/tar-fs": {
-      "version": "3.0.4",
-      "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.0.4.tgz",
-      "integrity": "sha512-5AFQU8b9qLfZCX9zp2duONhPmZv0hGYiBPJsyUdqMjzq/mqVpy/rEUSeHk1+YitmxugaptgBh5oDGU3VsAJq4w==",
-      "dependencies": {
-        "mkdirp-classic": "^0.5.2",
-        "pump": "^3.0.0",
-        "tar-stream": "^3.1.5"
-      }
-    },
-    "node_modules/sharp/node_modules/tar-stream": {
-      "version": "3.1.6",
-      "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.6.tgz",
-      "integrity": "sha512-B/UyjYwPpMBv+PaFSWAmtYjwdrlEaZQEhMIBFNC5oEG8lpiW8XjcSdmEaClj28ArfKScKHs2nshz3k2le6crsg==",
-      "dependencies": {
-        "b4a": "^1.6.4",
-        "fast-fifo": "^1.2.0",
-        "streamx": "^2.15.0"
+      },
+      "optionalDependencies": {
+        "@img/sharp-darwin-arm64": "0.33.5",
+        "@img/sharp-darwin-x64": "0.33.5",
+        "@img/sharp-libvips-darwin-arm64": "1.0.4",
+        "@img/sharp-libvips-darwin-x64": "1.0.4",
+        "@img/sharp-libvips-linux-arm": "1.0.5",
+        "@img/sharp-libvips-linux-arm64": "1.0.4",
+        "@img/sharp-libvips-linux-s390x": "1.0.4",
+        "@img/sharp-libvips-linux-x64": "1.0.4",
+        "@img/sharp-libvips-linuxmusl-arm64": "1.0.4",
+        "@img/sharp-libvips-linuxmusl-x64": "1.0.4",
+        "@img/sharp-linux-arm": "0.33.5",
+        "@img/sharp-linux-arm64": "0.33.5",
+        "@img/sharp-linux-s390x": "0.33.5",
+        "@img/sharp-linux-x64": "0.33.5",
+        "@img/sharp-linuxmusl-arm64": "0.33.5",
+        "@img/sharp-linuxmusl-x64": "0.33.5",
+        "@img/sharp-wasm32": "0.33.5",
+        "@img/sharp-win32-ia32": "0.33.5",
+        "@img/sharp-win32-x64": "0.33.5"
       }
     },
     "node_modules/shebang-command": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz",
       "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==",
-      "dev": true,
       "dependencies": {
         "shebang-regex": "^3.0.0"
       },
@@ -6880,7 +7018,6 @@
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz",
       "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==",
-      "dev": true,
       "engines": {
         "node": ">=8"
       }
@@ -6918,49 +7055,6 @@
       "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==",
       "dev": true
     },
-    "node_modules/simple-concat": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz",
-      "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ]
-    },
-    "node_modules/simple-get": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz",
-      "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
-      ],
-      "dependencies": {
-        "decompress-response": "^6.0.0",
-        "once": "^1.3.1",
-        "simple-concat": "^1.0.0"
-      }
-    },
     "node_modules/simple-swizzle": {
       "version": "0.2.2",
       "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz",
@@ -6975,18 +7069,6 @@
       "integrity": "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==",
       "dev": true
     },
-    "node_modules/slash": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/slash/-/slash-4.0.0.tgz",
-      "integrity": "sha512-3dOsAHXXUkQTpOYcoAxLIorMTp4gIQr5IW3iVb7A7lFIp0VHhnynm9izx6TssdrIcVIESAlVjtnO2K8bg+Coew==",
-      "dev": true,
-      "engines": {
-        "node": ">=12"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
-      }
-    },
     "node_modules/sockjs": {
       "version": "0.3.24",
       "resolved": "https://registry.npmjs.org/sockjs/-/sockjs-0.3.24.tgz",
@@ -7184,19 +7266,11 @@
         "node": ">=0.10.0"
       }
     },
-    "node_modules/streamx": {
-      "version": "2.15.5",
-      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.15.5.tgz",
-      "integrity": "sha512-9thPGMkKC2GctCzyCUjME3yR03x2xNo0GPKGkRw2UMYN+gqWa9uqpyNWhmsNCutU5zHmkUum0LsCRQTXUgUCAg==",
-      "dependencies": {
-        "fast-fifo": "^1.1.0",
-        "queue-tick": "^1.0.1"
-      }
-    },
     "node_modules/string_decoder": {
       "version": "1.3.0",
       "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
       "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
+      "dev": true,
       "dependencies": {
         "safe-buffer": "~5.2.0"
       }
@@ -7218,7 +7292,20 @@
       "version": "4.2.3",
       "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
       "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
-      "dev": true,
+      "dependencies": {
+        "emoji-regex": "^8.0.0",
+        "is-fullwidth-code-point": "^3.0.0",
+        "strip-ansi": "^6.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/string-width-cjs": {
+      "name": "string-width",
+      "version": "4.2.3",
+      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
+      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
       "dependencies": {
         "emoji-regex": "^8.0.0",
         "is-fullwidth-code-point": "^3.0.0",
@@ -7232,7 +7319,18 @@
       "version": "6.0.1",
       "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
       "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
-      "dev": true,
+      "dependencies": {
+        "ansi-regex": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/strip-ansi-cjs": {
+      "name": "strip-ansi",
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
+      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
       "dependencies": {
         "ansi-regex": "^5.0.1"
       },
@@ -7258,14 +7356,6 @@
         "node": ">=6"
       }
     },
-    "node_modules/strip-json-comments": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz",
-      "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==",
-      "engines": {
-        "node": ">=0.10.0"
-      }
-    },
     "node_modules/supports-color": {
       "version": "8.1.1",
       "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz",
@@ -7330,30 +7420,42 @@
         "node": ">=6"
       }
     },
-    "node_modules/tar-fs": {
-      "version": "2.1.1",
-      "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.1.tgz",
-      "integrity": "sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng==",
+    "node_modules/tar": {
+      "version": "7.2.0",
+      "resolved": "https://registry.npmjs.org/tar/-/tar-7.2.0.tgz",
+      "integrity": "sha512-hctwP0Nb4AB60bj8WQgRYaMOuJYRAPMGiQUAotms5igN8ppfQM+IvjQ5HcKu1MaZh2Wy2KWVTe563Yj8dfc14w==",
       "dependencies": {
-        "chownr": "^1.1.1",
-        "mkdirp-classic": "^0.5.2",
-        "pump": "^3.0.0",
-        "tar-stream": "^2.1.4"
+        "@isaacs/fs-minipass": "^4.0.0",
+        "chownr": "^3.0.0",
+        "minipass": "^7.1.0",
+        "minizlib": "^3.0.1",
+        "mkdirp": "^3.0.1",
+        "yallist": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=18"
       }
     },
-    "node_modules/tar-stream": {
-      "version": "2.2.0",
-      "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz",
-      "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==",
-      "dependencies": {
-        "bl": "^4.0.3",
-        "end-of-stream": "^1.4.1",
-        "fs-constants": "^1.0.0",
-        "inherits": "^2.0.3",
-        "readable-stream": "^3.1.1"
+    "node_modules/tar/node_modules/mkdirp": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz",
+      "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==",
+      "bin": {
+        "mkdirp": "dist/cjs/src/bin.js"
       },
       "engines": {
-        "node": ">=6"
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
+    "node_modules/tar/node_modules/yallist": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz",
+      "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==",
+      "engines": {
+        "node": ">=18"
       }
     },
     "node_modules/temp-path": {
@@ -7363,13 +7465,13 @@
       "dev": true
     },
     "node_modules/terser": {
-      "version": "5.17.1",
-      "resolved": "https://registry.npmjs.org/terser/-/terser-5.17.1.tgz",
-      "integrity": "sha512-hVl35zClmpisy6oaoKALOpS0rDYLxRFLHhRuDlEGTKey9qHjS1w9GMORjuwIMt70Wan4lwsLYyWDVnWgF+KUEw==",
+      "version": "5.31.6",
+      "resolved": "https://registry.npmjs.org/terser/-/terser-5.31.6.tgz",
+      "integrity": "sha512-PQ4DAriWzKj+qgehQ7LK5bQqCFNMmlhjR2PFFLuqGCpuCAauxemVBWwWOxo3UIwWQx8+Pr61Df++r76wDmkQBg==",
       "dev": true,
       "dependencies": {
-        "@jridgewell/source-map": "^0.3.2",
-        "acorn": "^8.5.0",
+        "@jridgewell/source-map": "^0.3.3",
+        "acorn": "^8.8.2",
         "commander": "^2.20.0",
         "source-map-support": "~0.5.20"
       },
@@ -7381,16 +7483,16 @@
       }
     },
     "node_modules/terser-webpack-plugin": {
-      "version": "5.3.7",
-      "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.7.tgz",
-      "integrity": "sha512-AfKwIktyP7Cu50xNjXF/6Qb5lBNzYaWpU6YfoX3uZicTx0zTy0stDDCsvjDapKsSDvOeWo5MEq4TmdBy2cNoHw==",
+      "version": "5.3.10",
+      "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.10.tgz",
+      "integrity": "sha512-BKFPWlPDndPs+NGGCr1U59t0XScL5317Y0UReNrHaw9/FwhPENlq6bfgs+4yPfyP51vqC1bQ4rp1EfXW5ZSH9w==",
       "dev": true,
       "dependencies": {
-        "@jridgewell/trace-mapping": "^0.3.17",
+        "@jridgewell/trace-mapping": "^0.3.20",
         "jest-worker": "^27.4.5",
         "schema-utils": "^3.1.1",
         "serialize-javascript": "^6.0.1",
-        "terser": "^5.16.5"
+        "terser": "^5.26.0"
       },
       "engines": {
         "node": ">= 10.13.0"
@@ -7495,16 +7597,11 @@
         "node": ">=0.6"
       }
     },
-    "node_modules/tunnel-agent": {
-      "version": "0.6.0",
-      "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
-      "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==",
-      "dependencies": {
-        "safe-buffer": "^5.0.1"
-      },
-      "engines": {
-        "node": "*"
-      }
+    "node_modules/tslib": {
+      "version": "2.6.3",
+      "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.3.tgz",
+      "integrity": "sha512-xNvxJEOUiWPGhUuUdQgAJPKOOJfGnIyKySOc09XkKsgdUV/3E2zvwZYdejjmRgPCgcym1juLH3226yA7sEFJKQ==",
+      "optional": true
     },
     "node_modules/type-detect": {
       "version": "4.0.8",
@@ -7594,9 +7691,9 @@
       }
     },
     "node_modules/update-browserslist-db": {
-      "version": "1.0.11",
-      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.11.tgz",
-      "integrity": "sha512-dCwEFf0/oT85M1fHBg4F0jtLwJrutGoHSQXCh7u4o2t1drG+c0a9Flnqww6XUKSfQMPpJBRjU8d4RXB09qtvaA==",
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.1.0.tgz",
+      "integrity": "sha512-EdRAaAyk2cUE1wOf2DkEhzxqOQvFOoRJFNS6NeyJ01Gp2beMRpBAINjM2iDXE3KCuKhwnvHIQCJm6ThL2Z+HzQ==",
       "dev": true,
       "funding": [
         {
@@ -7613,8 +7710,8 @@
         }
       ],
       "dependencies": {
-        "escalade": "^3.1.1",
-        "picocolors": "^1.0.0"
+        "escalade": "^3.1.2",
+        "picocolors": "^1.0.1"
       },
       "bin": {
         "update-browserslist-db": "cli.js"
@@ -7635,7 +7732,8 @@
     "node_modules/util-deprecate": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz",
-      "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw=="
+      "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
+      "dev": true
     },
     "node_modules/utils-merge": {
       "version": "1.0.1",
@@ -7703,9 +7801,9 @@
       }
     },
     "node_modules/watchpack": {
-      "version": "2.4.0",
-      "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.0.tgz",
-      "integrity": "sha512-Lcvm7MGST/4fup+ifyKi2hjyIAwcdI4HRgtvTpIUxBRhB+RFtUh8XtDOxUfctVCnhVi+QQj49i91OyvzkJl6cg==",
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.2.tgz",
+      "integrity": "sha512-TnbFSbcOCcDgjZ4piURLCbJ3nJhznVh9kw6F6iokjiFPl8ONxe9A6nMDVXDiNbrSfLILs6vB07F7wLBrwPYzJw==",
       "dev": true,
       "dependencies": {
         "glob-to-regexp": "^0.4.1",
@@ -7737,34 +7835,33 @@
       }
     },
     "node_modules/webpack": {
-      "version": "5.80.0",
-      "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.80.0.tgz",
-      "integrity": "sha512-OIMiq37XK1rWO8mH9ssfFKZsXg4n6klTEDL7S8/HqbAOBBaiy8ABvXvz0dDCXeEF9gqwxSvVk611zFPjS8hJxA==",
+      "version": "5.94.0",
+      "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.94.0.tgz",
+      "integrity": "sha512-KcsGn50VT+06JH/iunZJedYGUJS5FGjow8wb9c0v5n1Om8O1g4L6LjtfxwlXIATopoQu+vOXXa7gYisWxCoPyg==",
       "dev": true,
       "dependencies": {
-        "@types/eslint-scope": "^3.7.3",
-        "@types/estree": "^1.0.0",
-        "@webassemblyjs/ast": "^1.11.5",
-        "@webassemblyjs/wasm-edit": "^1.11.5",
-        "@webassemblyjs/wasm-parser": "^1.11.5",
+        "@types/estree": "^1.0.5",
+        "@webassemblyjs/ast": "^1.12.1",
+        "@webassemblyjs/wasm-edit": "^1.12.1",
+        "@webassemblyjs/wasm-parser": "^1.12.1",
         "acorn": "^8.7.1",
-        "acorn-import-assertions": "^1.7.6",
-        "browserslist": "^4.14.5",
+        "acorn-import-attributes": "^1.9.5",
+        "browserslist": "^4.21.10",
         "chrome-trace-event": "^1.0.2",
-        "enhanced-resolve": "^5.13.0",
+        "enhanced-resolve": "^5.17.1",
         "es-module-lexer": "^1.2.1",
         "eslint-scope": "5.1.1",
         "events": "^3.2.0",
         "glob-to-regexp": "^0.4.1",
-        "graceful-fs": "^4.2.9",
+        "graceful-fs": "^4.2.11",
         "json-parse-even-better-errors": "^2.3.1",
         "loader-runner": "^4.2.0",
         "mime-types": "^2.1.27",
         "neo-async": "^2.6.2",
-        "schema-utils": "^3.1.2",
+        "schema-utils": "^3.2.0",
         "tapable": "^2.1.1",
-        "terser-webpack-plugin": "^5.3.7",
-        "watchpack": "^2.4.0",
+        "terser-webpack-plugin": "^5.3.10",
+        "watchpack": "^2.4.1",
         "webpack-sources": "^3.2.3"
       },
       "bin": {
@@ -8074,7 +8171,6 @@
       "version": "2.0.2",
       "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
       "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==",
-      "dev": true,
       "dependencies": {
         "isexe": "^2.0.0"
       },
@@ -8136,10 +8232,28 @@
         "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
       }
     },
+    "node_modules/wrap-ansi-cjs": {
+      "name": "wrap-ansi",
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
+      "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
+      "dependencies": {
+        "ansi-styles": "^4.0.0",
+        "string-width": "^4.1.0",
+        "strip-ansi": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
+      }
+    },
     "node_modules/wrappy": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
-      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ=="
+      "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
+      "dev": true
     },
     "node_modules/write-file-atomic": {
       "version": "4.0.2",
@@ -8190,11 +8304,6 @@
         "node": ">=10"
       }
     },
-    "node_modules/yallist": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",
-      "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A=="
-    },
     "node_modules/yargs": {
       "version": "17.7.2",
       "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
diff --git a/package.json b/package.json
index 224682fb9..cb64c59e7 100644
--- a/package.json
+++ b/package.json
@@ -1,24 +1,47 @@
 {
-  "name": "@xenova/transformers",
-  "version": "2.17.2",
+  "name": "@huggingface/transformers",
+  "version": "3.0.0",
   "description": "State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!",
   "main": "./src/transformers.js",
   "types": "./types/transformers.d.ts",
   "type": "module",
+  "exports": {
+    "node": {
+      "import": {
+        "types": "./types/transformers.d.ts",
+        "default": "./dist/transformers.mjs"
+      },
+      "require": {
+        "types": "./types/transformers.d.ts",
+        "default": "./dist/transformers.cjs"
+      }
+    },
+    "default": {
+      "types": "./types/transformers.d.ts",
+      "default": "./dist/transformers.js"
+    }
+  },
+  "imports": {
+    "#onnxruntime-webgpu": {
+      "node": "onnxruntime-web",
+      "default": "onnxruntime-web/webgpu"
+    }
+  },
   "scripts": {
+    "format": "prettier --write .",
+    "format:check": "prettier --check .",
     "typegen": "tsc ./src/transformers.js --allowJs --declaration --emitDeclarationOnly --declarationMap --outDir types",
     "dev": "webpack serve --no-client-overlay",
     "build": "webpack && npm run typegen",
-    "generate-tests": "python -m tests.generate_tests",
-    "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --verbose --maxConcurrency 1",
+    "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --verbose",
     "readme": "python ./docs/scripts/build_readme.py",
     "docs-api": "node ./docs/scripts/generate.js",
     "docs-preview": "doc-builder preview transformers.js ./docs/source/ --not_python_module",
-    "docs-build": "doc-builder build transformers.js ./docs/source/ --not_python_module --build_dir ./docs/build/ --repo_owner xenova"
+    "docs-build": "doc-builder build transformers.js ./docs/source/ --not_python_module --build_dir ./docs/build/"
   },
   "repository": {
     "type": "git",
-    "url": "git+https://github.com/xenova/transformers.js.git"
+    "url": "git+https://github.com/huggingface/transformers.js.git"
   },
   "keywords": [
     "transformers",
@@ -31,37 +54,32 @@
     "AI",
     "ML"
   ],
-  "author": "Xenova",
+  "author": "Hugging Face",
   "license": "Apache-2.0",
   "bugs": {
-    "url": "https://github.com/xenova/transformers.js/issues"
+    "url": "https://github.com/huggingface/transformers.js/issues"
   },
-  "homepage": "https://github.com/xenova/transformers.js#readme",
+  "homepage": "https://github.com/huggingface/transformers.js#readme",
   "dependencies": {
-    "onnxruntime-web": "1.14.0",
-    "sharp": "^0.32.0",
-    "@huggingface/jinja": "^0.2.2"
-  },
-  "optionalDependencies": {
-    "onnxruntime-node": "1.14.0"
+    "@huggingface/jinja": "^0.3.0",
+    "onnxruntime-node": "1.19.2",
+    "onnxruntime-web": "1.20.0-dev.20241016-2b8fc5529b",
+    "sharp": "^0.33.5"
   },
   "devDependencies": {
     "@types/jest": "^29.5.1",
+    "@webgpu/types": "^0.1.44",
     "catharsis": "github:xenova/catharsis",
-    "copy-webpack-plugin": "^11.0.0",
     "jest": "^29.5.0",
     "jest-environment-node": "^29.5.0",
     "jsdoc-to-markdown": "^8.0.1",
+    "prettier": "3.3.3",
     "typescript": "^5.2.2",
     "wavefile": "^11.0.0",
     "webpack": "^5.80.0",
     "webpack-cli": "^5.0.2",
     "webpack-dev-server": "^4.13.3"
   },
-  "overrides": {
-    "semver": "^7.5.4",
-    "protobufjs": "^7.2.6"
-  },
   "files": [
     "src",
     "dist",
diff --git a/scripts/convert.py b/scripts/convert.py
index 3a2b223e8..bf9265e48 100644
--- a/scripts/convert.py
+++ b/scripts/convert.py
@@ -2,9 +2,9 @@
 import json
 import os
 import shutil
-from dataclasses import dataclass, field
-from typing import Optional, Set
-from tqdm import tqdm
+from dataclasses import dataclass, field, asdict
+from typing import Optional
+from enum import Enum
 
 from transformers import (
     AutoConfig,
@@ -12,117 +12,46 @@
     HfArgumentParser
 )
 
-import onnx
+import onnxslim
 from optimum.exporters.onnx import main_export, export_models
+from optimum.onnx.graph_transformations import check_and_save_model
 from optimum.exporters.tasks import TasksManager
-from onnxruntime.quantization import (
-    quantize_dynamic,
-    QuantType
-)
 
-DEFAULT_QUANTIZE_PARAMS = {
-    'per_channel': True,
-    'reduce_range': True,
-}
+from .quantize import QuantizationArguments, quantize
 
-MODEL_SPECIFIC_QUANTIZE_PARAMS = {
+NO_PER_CHANNEL_REDUCE_RANGE_MODELS = {
     # Decoder-only models
-    'codegen': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'gpt2': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'gpt_bigcode': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'gptj': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'gpt-neo': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'gpt-neox': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'mpt': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'bloom': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'llama': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'opt': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'mistral': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'falcon': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'phi': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'qwen2': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'stablelm': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'starcoder2': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
+    'codegen',
+    'gpt2',
+    'gpt_bigcode',
+    'gptj',
+    'gpt-neo',
+    'gpt-neox',
+    'mpt',
+    'bloom',
+    'llama',
+    'gemma',
+    'opt',
+    'mistral',
+    'falcon',
+    'phi',
+    'phi3',
+    'qwen2',
+    'stablelm',
+    'starcoder2',
+    'openelm',
+    'gemma',
 
     # Encoder-decoder models
-    'whisper': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'vision-encoder-decoder': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
+    'whisper',
+    'vision-encoder-decoder',
 
     # Encoder-only models
-    'owlv2': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'wavlm': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'wav2vec2': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'unispeech': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
-    'unispeech-sat': {
-        'per_channel': False,
-        'reduce_range': False,
-    },
+    'owlv2',
+    'wavlm',
+    'wav2vec2',
+    'unispeech',
+    'unispeech-sat',
 }
 
 MODELS_WITHOUT_TOKENIZERS = [
@@ -135,6 +64,16 @@
 ]
 
 
+class QuantMode(Enum):
+    # F32 = 'fp32'
+    FP16 = 'fp16'
+    Q8 = 'q8'
+    QI8 = 'int8'
+    QU8 = 'uint8'
+    Q4 = 'q4'
+    BNB4 = 'bnb4'
+
+
 @dataclass
 class ConversionArguments:
     """
@@ -174,7 +113,22 @@ class ConversionArguments:
             )
         }
     )
+    library_name: Optional[str] = field(
+        default=None,
+        metadata={
+            "help": (
+                "The library name to use for the export. If not specified, the library name will be auto-inferred based on the model."
+            )
+        }
+    )
 
+
+    variant: Optional[str] = field(
+        default='default',
+        metadata={
+            "help": "The variant of the ONNX export to use."
+        }
+    )
     opset: int = field(
         default=None,
         metadata={
@@ -197,19 +151,6 @@ class ConversionArguments:
         }
     )
 
-    per_channel: bool = field(
-        default=None,
-        metadata={
-            "help": "Whether to quantize weights per channel"
-        }
-    )
-    reduce_range: bool = field(
-        default=None,
-        metadata={
-            "help": "Whether to quantize weights with 7-bits. It may improve the accuracy for some models running on non-VNNI machine, especially for per-channel mode"
-        }
-    )
-
     output_attentions: bool = field(
         default=False,
         metadata={
@@ -239,90 +180,19 @@ class ConversionArguments:
             "that desire a finer-grained control on the export."
         }
     )
-
-
-def get_operators(model: onnx.ModelProto) -> Set[str]:
-    operators = set()
-
-    def traverse_graph(graph):
-        for node in graph.node:
-            operators.add(node.op_type)
-            for attr in node.attribute:
-                if attr.type == onnx.AttributeProto.GRAPH:
-                    subgraph = attr.g
-                    traverse_graph(subgraph)
-
-    traverse_graph(model.graph)
-    return operators
-
-
-def quantize(model_names_or_paths, **quantize_kwargs):
-    """
-    Quantize the weights of the model from float32 to int8 to allow very efficient inference on modern CPU
-
-    Uses unsigned ints for activation values, signed ints for weights, per
-    https://onnxruntime.ai/docs/performance/quantization.html#data-type-selection
-    it is faster on most CPU architectures
-    Args:
-        onnx_model_path: Path to location the exported ONNX model is stored
-    Returns: The Path generated for the quantized
-    """
-
-    quantize_config = dict(
-        **quantize_kwargs,
-        per_model_config={}
+    skip_onnxslim: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether or not to skip onnxslim."
+        }
     )
 
-    for model in tqdm(model_names_or_paths, desc='Quantizing'):
-        directory_path = os.path.dirname(model)
-        file_name_without_extension = os.path.splitext(
-            os.path.basename(model))[0]
-
-        # NOTE:
-        # As of 2023/04/20, the current latest version of onnxruntime-web is 1.14.0, and does not support INT8 weights for Conv layers.
-        # For this reason, we choose model weight types to ensure compatibility with onnxruntime-web.
-        #
-        # As per docs, signed weight type (QInt8) is faster on most CPUs, so, we use that unless the model contains a Conv layer.
-        # For more information, see:
-        #  - https://github.com/microsoft/onnxruntime/issues/3130#issuecomment-1105200621
-        #  - https://github.com/microsoft/onnxruntime/issues/2339
-
-        loaded_model = onnx.load_model(model)
-        op_types = get_operators(loaded_model)
-        weight_type = QuantType.QUInt8 if 'Conv' in op_types else QuantType.QInt8
-
-        quantize_dynamic(
-            model_input=model,
-            model_output=os.path.join(
-                directory_path, f'{file_name_without_extension}_quantized.onnx'),
-
-            weight_type=weight_type,
-            optimize_model=False,
-
-            # TODO allow user to specify these
-            # op_types_to_quantize=['MatMul', 'Add', 'Conv'],
-            extra_options=dict(
-                EnableSubgraph=True
-            ),
-            **quantize_kwargs
-        )
-
-        quantize_config['per_model_config'][file_name_without_extension] = dict(
-            op_types=list(op_types),
-            weight_type=str(weight_type),
-        )
-
-    # Save quantization config
-    with open(os.path.join(directory_path, 'quantize_config.json'), 'w') as fp:
-        json.dump(quantize_config, fp, indent=4)
-
-
 def main():
 
     parser = HfArgumentParser(
-        (ConversionArguments, )
+        (ConversionArguments, QuantizationArguments)
     )
-    conv_args, = parser.parse_args_into_dataclasses()
+    conv_args, quantization_args = parser.parse_args_into_dataclasses()
 
     model_id = conv_args.model_id
     tokenizer_id = conv_args.tokenizer_id or model_id
@@ -339,30 +209,38 @@ def main():
     # Saving the model config
     config = AutoConfig.from_pretrained(model_id, **from_pretrained_kwargs)
 
-    custom_kwargs={}
+    custom_kwargs = {}
     if conv_args.custom_onnx_configs is not None:
         if conv_args.task == 'auto':
-            raise Exception('`--task` must be set when exporting with `--custom_onnx_configs`')
+            raise Exception(
+                '`--task` must be set when exporting with `--custom_onnx_configs`')
         custom_onnx_configs = json.loads(conv_args.custom_onnx_configs)
 
         for key in custom_onnx_configs:
             onnx_configs = TasksManager._SUPPORTED_MODEL_TYPE[custom_onnx_configs[key]]['onnx']
             mapping = onnx_configs[conv_args.task]
-            custom_onnx_configs[key] = mapping.func(config, **mapping.keywords)
+            new_kwargs = {}
+            if conv_args.task.startswith('text-generation'):
+                new_kwargs['use_past_in_inputs'] = True
+
+            custom_onnx_configs[key] = mapping.func(
+                config, **mapping.keywords, **new_kwargs)
 
         custom_kwargs['custom_onnx_configs'] = custom_onnx_configs
 
     tokenizer = None
     try:
         # Load tokenizer
-        tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, **from_pretrained_kwargs)
+        tokenizer = AutoTokenizer.from_pretrained(
+            tokenizer_id, **from_pretrained_kwargs)
 
         # To avoid inserting all chat templates into tokenizers.js, we save the chat template
         # to the tokenizer_config.json file, and load it when the tokenizer is loaded.
         if getattr(tokenizer, 'chat_template', None) is None and \
-            getattr(tokenizer, 'use_default_system_prompt', False):
+                getattr(tokenizer, 'use_default_system_prompt', False):
             # No chat template specified, and we use the default
-            setattr(tokenizer, 'chat_template', tokenizer.default_chat_template)
+            setattr(tokenizer, 'chat_template',
+                    tokenizer.default_chat_template)
 
     except KeyError:
         pass  # No Tokenizer
@@ -383,7 +261,8 @@ def main():
         output=output_model_folder,
         task=conv_args.task,
         do_validation=not conv_args.skip_validation,
-        library_name='transformers',
+        _variant=conv_args.variant,
+        library_name=conv_args.library_name,
         **core_export_kwargs,
     )
 
@@ -398,7 +277,8 @@ def main():
     elif config.model_type == 'esm':
         from .extra.esm import generate_fast_tokenizer
         fast_tokenizer = generate_fast_tokenizer(tokenizer)
-        fast_tokenizer.save(os.path.join(output_model_folder, 'tokenizer.json'))
+        fast_tokenizer.save(os.path.join(
+            output_model_folder, 'tokenizer.json'))
 
     elif config.model_type == 'whisper':
         if conv_args.output_attentions:
@@ -408,14 +288,14 @@ def main():
                 **get_main_export_kwargs(config, "automatic-speech-recognition")
             )
 
-    elif config.model_type in ('wav2vec2', 'wav2vec2-bert', 'hubert', 'unispeech' , 'unispeech-sat'):
+    elif config.model_type in ('wav2vec2', 'wav2vec2-bert', 'hubert', 'unispeech', 'unispeech-sat'):
         if tokenizer is not None:
             from .extra.wav2vec2 import generate_tokenizer_json
             tokenizer_json = generate_tokenizer_json(tokenizer)
 
             with open(os.path.join(output_model_folder, 'tokenizer.json'), 'w', encoding='utf-8') as fp:
                 json.dump(tokenizer_json, fp, indent=4)
-    
+
     elif config.model_type == 'vits':
         if tokenizer is not None:
             from .extra.vits import generate_tokenizer_json
@@ -423,10 +303,11 @@ def main():
 
             with open(os.path.join(output_model_folder, 'tokenizer.json'), 'w', encoding='utf-8') as fp:
                 json.dump(tokenizer_json, fp, indent=4)
-    
+
     elif config.model_type == 'speecht5':
         # TODO allow user to specify vocoder path
-        export_kwargs["model_kwargs"] = {"vocoder": "microsoft/speecht5_hifigan"}
+        export_kwargs["model_kwargs"] = {
+            "vocoder": "microsoft/speecht5_hifigan"}
 
         if tokenizer is not None:
             from .extra.speecht5 import generate_tokenizer_json
@@ -440,6 +321,26 @@ def main():
         # For more information, see https://github.com/huggingface/optimum/blob/e3b7efb1257c011db907ef40ab340e795cc5684c/optimum/exporters/onnx/model_configs.py#L1028-L1032
         export_kwargs['batch_size'] = 1
 
+    elif config.model_type == 'openelm':
+        from .extra.openelm import OpenElmOnnxConfig
+
+        config = AutoConfig.from_pretrained(
+            model_id, trust_remote_code=conv_args.trust_remote_code)
+
+        onnx_config = OpenElmOnnxConfig(
+            config=config,
+            task="text-generation",
+            use_past=True,
+            use_past_in_inputs=True,
+        )
+
+        custom_onnx_configs = {
+            "model": onnx_config,
+        }
+
+        export_kwargs['task'] = "text-generation-with-past"
+        export_kwargs['custom_onnx_configs'] = custom_onnx_configs
+
     else:
         pass  # TODO
 
@@ -457,8 +358,10 @@ def main():
             from .extra.clip import CLIPTextModelWithProjectionOnnxConfig, CLIPVisionModelWithProjectionOnnxConfig
             from transformers.models.clip import CLIPTextModelWithProjection, CLIPVisionModelWithProjection
 
-            text_model = CLIPTextModelWithProjection.from_pretrained(model_id, **from_pretrained_kwargs)
-            vision_model = CLIPVisionModelWithProjection.from_pretrained(model_id, **from_pretrained_kwargs)
+            text_model = CLIPTextModelWithProjection.from_pretrained(
+                model_id, **from_pretrained_kwargs)
+            vision_model = CLIPVisionModelWithProjection.from_pretrained(
+                model_id, **from_pretrained_kwargs)
 
             export_models(
                 models_and_onnx_configs={
@@ -473,8 +376,10 @@ def main():
             from .extra.siglip import SiglipTextModelOnnxConfig, SiglipVisionModelOnnxConfig
             from transformers.models.siglip import SiglipTextModel, SiglipVisionModel
 
-            text_model = SiglipTextModel.from_pretrained(model_id, **from_pretrained_kwargs)
-            vision_model = SiglipVisionModel.from_pretrained(model_id, **from_pretrained_kwargs)
+            text_model = SiglipTextModel.from_pretrained(
+                model_id, **from_pretrained_kwargs)
+            vision_model = SiglipVisionModel.from_pretrained(
+                model_id, **from_pretrained_kwargs)
 
             export_models(
                 models_and_onnx_configs={
@@ -500,32 +405,43 @@ def main():
         #         },
         #         **custom_export_kwargs,
         #     )
-
         else:
-            raise Exception(f'Unable to export {config.model_type} model with `--split_modalities`.')
+            raise Exception(
+                f'Unable to export {config.model_type} model with `--split_modalities`.')
+
+    os.makedirs(os.path.join(output_model_folder, 'onnx'), exist_ok=True)
 
+    if not conv_args.skip_onnxslim:
+        onnx_models = [os.path.join(output_model_folder, x)
+                    for x in os.listdir(output_model_folder) if x.endswith('.onnx')]
+
+        for model in onnx_models:
+            try:
+                slimmed_model = onnxslim.slim(model)
+                check_and_save_model(slimmed_model, model)
+            except Exception as e:
+                print(f"Failed to slim {model}: {e}")
 
     # Step 2. (optional, recommended) quantize the converted model for fast inference and to reduce model size.
     if conv_args.quantize:
-        # Update quantize config with model specific defaults
-        quantize_config = MODEL_SPECIFIC_QUANTIZE_PARAMS.get(
-            config.model_type, DEFAULT_QUANTIZE_PARAMS)
 
-        # Update if user specified values
-        if conv_args.per_channel is not None:
-            quantize_config['per_channel'] = conv_args.per_channel
+        # Possibly update quantize config with model specific defaults
+        use_per_channel_reduce_range = config.model_type not in NO_PER_CHANNEL_REDUCE_RANGE_MODELS
 
-        if conv_args.reduce_range is not None:
-            quantize_config['reduce_range'] = conv_args.reduce_range
+        if quantization_args.per_channel is None:
+            quantization_args.per_channel = use_per_channel_reduce_range
+        if quantization_args.reduce_range is None:
+            quantization_args.reduce_range = use_per_channel_reduce_range
 
-        quantize([
-            os.path.join(output_model_folder, x)
-            for x in os.listdir(output_model_folder)
-            if x.endswith('.onnx') and not x.endswith('_quantized.onnx')
-        ], **quantize_config)
+        quantize(
+            output_model_folder,
+            os.path.join(output_model_folder, 'onnx'),
+            quantization_args,
+        )
+        with open(os.path.join(output_model_folder, 'quantize_config.json'), 'w') as fp:
+            json.dump(asdict(quantization_args), fp, indent=4)
 
     # Step 3. Move .onnx files to the 'onnx' subfolder
-    os.makedirs(os.path.join(output_model_folder, 'onnx'), exist_ok=True)
     for file in os.listdir(output_model_folder):
         if file.endswith(('.onnx', '.onnx_data')):
             shutil.move(os.path.join(output_model_folder, file),
@@ -536,7 +452,8 @@ def main():
         from transformers import GenerationConfig
         from .extra.whisper import get_alignment_heads
 
-        generation_config = GenerationConfig.from_pretrained(model_id, **from_pretrained_kwargs)
+        generation_config = GenerationConfig.from_pretrained(
+            model_id, **from_pretrained_kwargs)
         generation_config.alignment_heads = get_alignment_heads(config)
         generation_config.save_pretrained(output_model_folder)
 
diff --git a/scripts/extra/marian.py b/scripts/extra/marian.py
index e5f370021..ef9bd279d 100644
--- a/scripts/extra/marian.py
+++ b/scripts/extra/marian.py
@@ -1,61 +1,6 @@
 import json
 from transformers.utils import cached_file
 
-# NOTE: In total, there are 1440 models available on the HuggingFace hub (https://huggingface.co/Helsinki-NLP).
-# We have converted some of these (listed below). If you don't see your model here, feel free to convert it yourself
-# and make a pull request to this repo.
-
-SUPPORTED_HELSINKI_NLP_MODELS = [
-    'en-es', 'es-en',            # English <-> Spanish
-    'en-fr', 'fr-en',            # English <-> French
-    'en-hi', 'hi-en',            # English <-> Hindi
-    'en-de', 'de-en',            # English <-> German
-    'en-ru', 'ru-en',            # English <-> Russian
-    'en-it', 'it-en',            # English <-> Italian
-    'en-ar', 'ar-en',            # English <-> Arabic
-    'en-zh', 'zh-en',            # English <-> Chinese
-    'en-sv', 'sv-en',            # English <-> Swedish
-    'en-mul', 'mul-en',          # English <-> Multilingual
-    'en-nl', 'nl-en',            # English <-> Dutch
-    'en-fi', 'fi-en',            # English <-> Finnish
-    'en-jap', 'jap-en',          # English <-> Japanese
-    'en-cs', 'cs-en',            # English <-> Czech
-    'en-vi', 'vi-en',            # English <-> Vietnamese
-    'en-xh', 'xh-en',            # English <-> Xhosa
-    'en-hu', 'hu-en',            # English <-> Hungarian
-    'en-da', 'da-en',            # English <-> Danish
-    'en-id', 'id-en',            # English <-> Indonesia
-    'en-uk', 'uk-en',            # English <-> Ukranian
-    'en-af', 'af-en',            # English <-> Afrikaans
-    'en-ROMANCE', 'ROMANCE-en',  # English <-> ROMANCE
-    'de-es', 'es-de',            # German <-> Spanish
-    'fr-es', 'es-fr',            # French <-> Spanish
-    'fr-de', 'de-fr',            # French <-> German
-    'es-it', 'it-es',            # Spanish <-> Italian
-    'es-ru', 'ru-es',            # Spanish <-> Russian
-    'fr-ru', 'ru-fr',            # French <-> Russian
-    'fr-ro', 'ro-fr',            # French <-> Romanian
-    'uk-ru', 'ru-uk',            # Ukranian <-> Russian
-
-    'it-fr',                     # Italian --> French
-    'en-ro',                     # English --> Romanian
-    'pl-en',                     # Poland --> English
-    'tr-en',                     # Turkey --> English
-    'ko-en',                     # Korean --> English
-    'bat-en',                    # Baltic --> English
-    'et-en',                     # Estonian --> English
-    'fi-de',                     # Finnish --> German
-    'gem-gem',                   # Germanic <-> Germanic
-    'gmw-gmw',                   # West Germanic <-> West Germanic
-    'da-de',                     # Danish <-> German
-    'ja-en',                     # Japanese --> English
-    'nl-fr',                     # Netherlands --> French
-    'no-de',                     # Norwegian --> German
-    'tc-big-tr-en',              # Turkish --> English
-    'th-en',                     # Thai --> English
-    'en-cs',                     # English --> Czech
-]
-
 
 def generate_tokenizer_json(model_path, tokenizer):
     # Marian models use two separate tokenizers for source and target languages.
diff --git a/scripts/extra/openelm.py b/scripts/extra/openelm.py
new file mode 100644
index 000000000..28ce793ee
--- /dev/null
+++ b/scripts/extra/openelm.py
@@ -0,0 +1,64 @@
+import random
+from typing import Optional, Tuple
+
+from optimum.exporters.onnx.config import TextDecoderOnnxConfig
+from optimum.utils import NormalizedTextConfig, DummyInputGenerator, DEFAULT_DUMMY_SHAPES, DummyTextInputGenerator, NormalizedConfig
+
+class OpenElmDummyPastKeyValuesGenerator(DummyInputGenerator):
+
+    SUPPORTED_INPUT_NAMES = ("past_key_values", )
+
+    def __init__(
+        self,
+        task: str,
+        normalized_config: NormalizedTextConfig,
+        batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"],
+        sequence_length: int = DEFAULT_DUMMY_SHAPES["sequence_length"],
+        random_batch_size_range: Optional[Tuple[int, int]] = None,
+        random_sequence_length_range: Optional[Tuple[int, int]] = None,
+        **kwargs,
+    ):
+        self.num_layers = normalized_config.num_layers
+        self.num_kv_heads = normalized_config.num_kv_heads
+        self.num_query_heads = normalized_config.num_query_heads
+        self.head_dim = normalized_config.head_dim
+
+        self.hidden_size = normalized_config.model_dim
+        if random_batch_size_range:
+            low, high = random_batch_size_range
+            self.batch_size = random.randint(low, high)
+        else:
+            self.batch_size = batch_size
+        if random_sequence_length_range:
+            low, high = random_sequence_length_range
+            self.sequence_length = random.randint(low, high)
+        else:
+            self.sequence_length = sequence_length
+
+    def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
+        data = []
+        for i in range(self.num_layers):
+            kv_shape = (
+                self.batch_size,
+                self.num_kv_heads[i],
+                self.sequence_length,
+                self.head_dim,
+            )
+            data.append((
+                self.random_float_tensor(kv_shape, framework=framework, dtype=float_dtype),
+                self.random_float_tensor(kv_shape, framework=framework, dtype=float_dtype),
+            ))
+        return data
+
+
+class OpenElmOnnxConfig(TextDecoderOnnxConfig):
+    DEFAULT_ONNX_OPSET = 14
+
+    DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, OpenElmDummyPastKeyValuesGenerator)
+    DUMMY_PKV_GENERATOR_CLASS = OpenElmDummyPastKeyValuesGenerator
+    NORMALIZED_CONFIG_CLASS = NormalizedConfig.with_args(
+        num_kv_heads="num_kv_heads",
+        num_query_heads="num_query_heads",
+        num_layers="num_transformer_layers",
+        allow_new=True,
+    )
diff --git a/scripts/extra/whisper.py b/scripts/extra/whisper.py
index 1a1a70aab..2a9937c96 100644
--- a/scripts/extra/whisper.py
+++ b/scripts/extra/whisper.py
@@ -14,44 +14,30 @@
     'whisper-small': [[5, 3], [5, 9], [8, 0], [8, 4], [8, 7], [8, 8], [9, 0], [9, 7], [9, 9], [10, 5]],
     'whisper-medium.en': [[11, 4], [14, 1], [14, 12], [14, 14], [15, 4], [16, 0], [16, 4], [16, 9], [17, 12], [17, 14], [18, 7], [18, 10], [18, 15], [20, 0], [20, 3], [20, 9], [20, 14], [21, 12]],
     'whisper-medium': [[13, 15], [15, 4], [15, 15], [16, 1], [20, 0], [23, 4]],
+    'whisper-large-v3-turbo': [[2, 4], [2, 11], [3, 3], [3, 6], [3, 11], [3, 14]],
     'whisper-large-v2': [[10, 12], [13, 17], [16, 11], [16, 12], [16, 13], [17, 15], [17, 16], [18, 4], [18, 11], [18, 19], [19, 11], [21, 2], [21, 3], [22, 3], [22, 9], [22, 12], [23, 5], [23, 7], [23, 13], [25, 5], [26, 1], [26, 12], [27, 15]],
     'whisper-large': [[9, 19], [11, 2], [11, 4], [11, 17], [22, 7], [22, 11], [22, 17], [23, 2], [23, 15]],
 }
 
 
 class CustomWhisperOnnxConfig(WhisperOnnxConfig):
+    """
+    Custom ONNX config for Whisper models to output cross attentions.
+    Needed to compute token-level timestamps.
+    """
     @property
     def outputs(self) -> Dict[str, Dict[int, str]]:
         common_outputs = super().outputs
 
-        if self._behavior is ConfigBehavior.ENCODER:
-            for i in range(self._config.encoder_layers):
-                common_outputs[f"encoder_attentions.{i}"] = {0: "batch_size"}
-        elif self._behavior is ConfigBehavior.DECODER:
-            for i in range(self._config.decoder_layers):
-                common_outputs[f"decoder_attentions.{i}"] = {
-                    0: "batch_size",
-                    2: "decoder_sequence_length",
-                    3: "past_decoder_sequence_length + 1"
-                }
+        if self._behavior is ConfigBehavior.DECODER:
             for i in range(self._config.decoder_layers):
                 common_outputs[f"cross_attentions.{i}"] = {
                     0: "batch_size",
                     2: "decoder_sequence_length",
                     3: "encoder_sequence_length_out"
                 }
-
         return common_outputs
 
-    @property
-    def torch_to_onnx_output_map(self):
-        if self._behavior is ConfigBehavior.ENCODER:
-            # The encoder export uses WhisperEncoder that returns the key "attentions"
-            return {"attentions": "encoder_attentions"}
-        else:
-            return {}
-
-
 def get_main_export_kwargs(config, task):
 
     # See https://github.com/huggingface/optimum/blob/a39b1f5637af9725c0c788b86ca1fdf71ad3dcc2/docs/source/exporters/onnx/usage_guides/export_a_model.mdx#L264
@@ -59,9 +45,8 @@ def get_main_export_kwargs(config, task):
 
     custom_onnx_configs = dict(
         encoder_model=custom_config.with_behavior("encoder"),
-        decoder_model=custom_config.with_behavior("decoder", use_past=False),
-        decoder_with_past_model=custom_config.with_behavior(
-            "decoder", use_past=True),
+        decoder_model=custom_config.with_behavior("decoder", use_past=True, use_past_in_inputs=False),
+        decoder_with_past_model=custom_config.with_behavior("decoder", use_past=True, use_past_in_inputs=True),
     )
 
     return dict(
diff --git a/scripts/quantize.py b/scripts/quantize.py
new file mode 100644
index 000000000..1ace2d353
--- /dev/null
+++ b/scripts/quantize.py
@@ -0,0 +1,345 @@
+from enum import Enum
+
+from tqdm import tqdm
+from typing import Set
+import onnx
+import os
+
+from dataclasses import dataclass, field
+
+from transformers import HfArgumentParser
+from optimum.onnx.graph_transformations import check_and_save_model
+
+from onnxruntime.quantization import QuantType, QuantizationMode
+from onnxruntime.quantization.onnx_quantizer import ONNXQuantizer
+from onnxruntime.quantization.registry import IntegerOpsRegistry
+from onnxruntime.quantization.matmul_4bits_quantizer import MatMul4BitsQuantizer
+from onnxruntime.quantization.matmul_bnb4_quantizer import MatMulBnb4Quantizer
+from onnxconverter_common import float16
+import onnx_graphsurgeon as gs
+
+
+class QuantMode(Enum):
+    # F32 = 'fp32'
+    FP16 = "fp16"
+    Q8 = "q8"
+    QI8 = "int8"
+    QU8 = "uint8"
+    Q4 = "q4"
+    Q4F16 = "q4f16"
+    BNB4 = "bnb4"
+
+
+QUANTIZE_SUFFIX_MAPPING = {
+    QuantMode.Q8: "quantized",
+}
+
+QUANTIZE_OPTIONS = tuple(x.value for x in QuantMode)
+
+
+@dataclass
+class IOArguments:
+    """
+    Arguments to specify input and output folders
+    """
+    input_folder: str = field(
+        metadata={
+            "help": "Path of the input folder containing the .onnx models to quantize"
+        }
+    )
+    output_folder: str = field(
+        metadata={
+            "help": "Path of the output folder where the quantized .onnx models will be saved"
+        }
+    )
+
+@dataclass
+class QuantizationArguments:
+    """
+    Arguments for quantizing ONNX models
+    """
+
+    modes: QuantMode = field(
+        default=QUANTIZE_OPTIONS,
+        metadata={
+            "help": "Quantization mode to use.",
+            "choices": QUANTIZE_OPTIONS,
+            "nargs": "+",
+        },
+    )
+
+    # 8-bit quantization
+    per_channel: bool = field(
+        default=None, metadata={"help": "Whether to quantize weights per channel"}
+    )
+    reduce_range: bool = field(
+        default=None,
+        metadata={
+            "help": "Whether to quantize weights with 7-bits. It may improve the accuracy for some models running on non-VNNI machine, especially for per-channel mode"
+        },
+    )
+
+    # 4-bit quantization
+    block_size: int = field(
+        default=None,
+        metadata={
+            "help": "Block size for blockwise quantization. Note: bnb.nn.Linear4bit only uses block_size=64"
+        },
+    )
+
+    # MatMul4BitsQuantizer
+    is_symmetric: bool = field(
+        default=True,
+        metadata={"help": "Indicate whether to quantize the model symmetrically"},
+    )
+    accuracy_level: int = field(
+        default=None,
+        metadata={
+            "help": "Accuracy level of the 4-bit quantized MatMul computation. "
+            "Refer to the MatMulNBits contrib op's 'accuracy_level' attribute for details "
+            "(https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#commicrosoftmatmulnbits)."
+        },
+    )
+
+    # MatMulBnb4Quantizer
+    quant_type: int = field(
+        default=MatMulBnb4Quantizer.NF4,
+        metadata={
+            "help": "Quantization data type. 0: FP4, 1: NF4",
+            "choices": [MatMulBnb4Quantizer.FP4, MatMulBnb4Quantizer.NF4],
+        },
+    )
+
+
+def get_operators(model: onnx.ModelProto) -> Set[str]:
+    operators = set()
+
+    def traverse_graph(graph):
+        for node in graph.node:
+            operators.add(node.op_type)
+            for attr in node.attribute:
+                if attr.type == onnx.AttributeProto.GRAPH:
+                    traverse_graph(attr.g)
+
+    traverse_graph(model.graph)
+    return operators
+
+
+def quantize_q8(
+    model: onnx.ModelProto,
+    save_path: str,
+    per_channel: bool,
+    reduce_range: bool,
+    weight_type: QuantType,
+):
+    """
+    Quantize the weights of the model from float32 to int8/uint8
+
+    Uses unsigned ints for activation values, signed ints for weights, per
+    https://onnxruntime.ai/docs/performance/quantization.html#data-type-selection
+    it is faster on most CPU architectures
+    """
+
+    quantizer = ONNXQuantizer(
+        model,
+        per_channel,
+        reduce_range,
+        mode=QuantizationMode.IntegerOps,
+        static=False,
+        weight_qType=weight_type,
+        activation_qType=QuantType.QUInt8,  # dynamic activation only supports uint8
+        tensors_range=None,
+        nodes_to_quantize=[],
+        nodes_to_exclude=[],
+        op_types_to_quantize=list(IntegerOpsRegistry.keys()),
+        extra_options=dict(
+            EnableSubgraph=True,
+            MatMulConstBOnly=True,
+        ),
+    )
+
+    quantizer.quantize_model()
+    check_and_save_model(quantizer.model.model, save_path)
+
+
+def quantize_fp16(
+    model: onnx.ModelProto,
+    save_path: str,
+):
+    """
+    Quantize the weights of the model from float32 to float16
+    """
+
+    # Check whether we should disable shape infer:
+    # ValueError: Message onnx.ModelProto exceeds maximum protobuf size of 2GB: 2338583841
+    disable_shape_infer = model.ByteSize() >= onnx.checker.MAXIMUM_PROTOBUF
+
+    model_fp16 = float16.convert_float_to_float16(
+        model,
+        keep_io_types=True,
+        disable_shape_infer=disable_shape_infer,
+    )
+    graph = gs.import_onnx(model_fp16)
+    graph.toposort()
+    model_fp16 = gs.export_onnx(graph)
+    check_and_save_model(model_fp16, save_path)
+
+
+def quantize_q4(
+    model: onnx.ModelProto,
+    save_path: str | None,
+    block_size: int,
+    is_symmetric: bool,
+    accuracy_level: int,
+):
+    """
+    Quantize the weights of the model from float32 to 4-bit int
+    """
+
+    quantizer = MatMul4BitsQuantizer(
+        model=model,
+        block_size=block_size,
+        is_symmetric=is_symmetric,
+        accuracy_level=accuracy_level,
+    )
+    quantizer.process()
+    if save_path:
+        check_and_save_model(quantizer.model.model, save_path)
+    return quantizer.model.model
+
+
+def quantize_bnb4(
+    model: onnx.ModelProto,
+    save_path: str,
+    block_size: int,
+    quant_type: int,
+):
+    """
+    Quantize the weights of the model from float32 to 4-bit int using MatMulBnb4Quantizer
+    """
+
+    quantizer = MatMulBnb4Quantizer(
+        model=model,
+        block_size=block_size,
+        quant_type=quant_type,
+    )
+    quantizer.process()
+    check_and_save_model(quantizer.model.model, save_path)
+    return quantizer.model.model
+
+
+def quantize(input_folder, output_folder, quantization_args: QuantizationArguments):
+
+    # (Step 1) Validate the arguments
+    if not quantization_args.modes:
+        raise ValueError("At least one quantization mode must be specified")
+
+    if not os.path.exists(input_folder):
+        raise ValueError(f"Input folder {input_folder} does not exist")
+
+    model_names_or_paths = [
+        os.path.join(input_folder, file)
+        for file in os.listdir(input_folder)
+        if file.endswith(".onnx")
+    ]
+    if not model_names_or_paths:
+        raise ValueError(f"No .onnx models found in {input_folder}")
+
+    os.makedirs(output_folder, exist_ok=True)
+
+    # (Step 2) Quantize the models
+    for model_path in (progress_models := tqdm(model_names_or_paths)):
+        progress_models.set_description(f"Processing {model_path}")
+
+        file_name_without_extension = os.path.splitext(os.path.basename(model_path))[0]
+
+        for mode in (progress := tqdm(quantization_args.modes)):
+            progress.set_description(f" - Quantizing to {mode}")
+            mode = QuantMode(mode)
+            suffix = QUANTIZE_SUFFIX_MAPPING.get(mode, mode.value)
+            save_path = os.path.join(
+                output_folder,
+                f"{file_name_without_extension}_{suffix}.onnx",
+            )
+
+            # NOTE: Unfortunately, we need to reload the model for each quantization mode,
+            # which is memory inefficient. This is because the quantization functions
+            # modify the model in-place, and we need to keep the original model for each mode.
+            model = onnx.load_model(model_path)
+
+            if mode == QuantMode.FP16:
+                quantize_fp16(
+                    model,
+                    save_path,
+                )
+
+            elif mode in (QuantMode.Q4, QuantMode.Q4F16):
+                block_size = quantization_args.block_size or 32
+
+                q4_model = quantize_q4(
+                    model,
+                    save_path=None if mode == QuantMode.Q4F16 else save_path,
+                    block_size=block_size,
+                    is_symmetric=quantization_args.is_symmetric,
+                    accuracy_level=quantization_args.accuracy_level,
+                )
+                if mode == QuantMode.Q4F16:
+                    quantize_fp16(
+                        q4_model,
+                        save_path,
+                    )
+
+            elif mode == QuantMode.BNB4:
+                quantize_bnb4(
+                    model,
+                    save_path,
+                    block_size=quantization_args.block_size or 64,
+                    quant_type=(
+                        quantization_args.quant_type
+                        if quantization_args.quant_type is not None
+                        else MatMulBnb4Quantizer.NF4
+                    ),
+                )
+
+            elif mode in (QuantMode.Q8, QuantMode.QI8, QuantMode.QU8):
+                if mode == QuantMode.Q8:
+                    # NOTE:
+                    # As of 2024/06/28, the current latest version of onnxruntime-web is 1.18.0, and does not support INT8 weights for Conv layers.
+                    # If you attempt to run a model with INT8 weights for Conv layers, you will get an error like:
+                    # `Can't create a session. ERROR_CODE: 9, ERROR_MESSAGE: Could not find an implementation for ConvInteger(10) node with name '/.../Conv_quant'`
+                    #
+                    # For this reason, we choose model weight types to ensure compatibility with onnxruntime-web.
+                    #
+                    # As per docs, signed weight type (QInt8) is faster on most CPUs, so, we use that unless the model contains a Conv layer.
+                    # For more information, see:
+                    #  - https://github.com/microsoft/onnxruntime/issues/3130#issuecomment-1105200621
+                    #  - https://github.com/microsoft/onnxruntime/issues/2339
+                    op_types = get_operators(model)
+                    weight_type = (
+                        QuantType.QUInt8 if "Conv" in op_types else QuantType.QInt8
+                    )
+
+                elif mode == QuantMode.QI8:
+                    weight_type = QuantType.QInt8
+
+                else:  # mode == QuantMode.QU8:
+                    weight_type = QuantType.QUInt8
+
+                quantize_q8(
+                    model,
+                    save_path,
+                    per_channel=quantization_args.per_channel,
+                    reduce_range=quantization_args.reduce_range,
+                    weight_type=weight_type,
+                )
+
+
+def main():
+    parser = HfArgumentParser((IOArguments, QuantizationArguments))
+    io_args, quantization_args = parser.parse_args_into_dataclasses()
+    input_folder = io_args.input_folder
+    output_folder = io_args.output_folder
+    quantize(input_folder, output_folder, quantization_args)
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
index f0b3867ae..9773d04e7 100644
--- a/scripts/requirements.txt
+++ b/scripts/requirements.txt
@@ -1,5 +1,9 @@
-transformers[torch]==4.33.2
-onnxruntime<1.16.0
-optimum==1.13.2
-tqdm
-onnx==1.13.1
+transformers[torch]==4.43.4
+onnxruntime==1.19.2
+optimum==1.21.3
+onnx==1.16.2
+onnxconverter-common==1.14.0
+tqdm==4.66.5
+onnxslim==0.1.31
+--extra-index-url https://pypi.ngc.nvidia.com
+onnx_graphsurgeon==0.3.27
diff --git a/scripts/supported_models.py b/scripts/supported_models.py
deleted file mode 100644
index dc044167e..000000000
--- a/scripts/supported_models.py
+++ /dev/null
@@ -1,1206 +0,0 @@
-from .extra.marian import SUPPORTED_HELSINKI_NLP_MODELS
-
-
-SUPPORTED_MODELS = {
-    # NOTE: keys of `SUPPORTED_MODELS` are subsets of https://github.com/huggingface/optimum/blob/7f8e606689365931300ef5e6d3b20cb88771cb08/optimum/exporters/tasks.py#L281-L965
-    'albert': {
-        # Masked language modelling
-        'fill-mask': [
-            'albert-base-v2',
-            'albert-large-v2',
-        ],
-
-        # Feature extraction
-        'feature-extraction': [
-            'sentence-transformers/paraphrase-albert-small-v2',
-            'sentence-transformers/paraphrase-albert-base-v2',
-        ],
-    },
-    'audio-spectrogram-transformer': {
-        # Audio classification
-        'audio-classification': {
-            'MIT/ast-finetuned-audioset-10-10-0.4593',
-            'MIT/ast-finetuned-audioset-16-16-0.442',
-            'MIT/ast-finetuned-speech-commands-v2',
-            'mtg-upf/discogs-maest-30s-pw-73e-ts',
-        }
-    },
-    'bart': {
-        # Summarization
-        'summarization': [
-            'sshleifer/distilbart-xsum-12-1',
-            'sshleifer/distilbart-xsum-6-6',
-            'sshleifer/distilbart-xsum-12-3',
-            'sshleifer/distilbart-xsum-9-6',
-            'sshleifer/distilbart-xsum-12-6',
-            'sshleifer/distilbart-cnn-12-3',
-            'sshleifer/distilbart-cnn-12-6',
-            'sshleifer/distilbart-cnn-6-6',
-            'facebook/bart-large-cnn',
-            'facebook/bart-large-xsum',
-        ],
-        # Zero-shot classification
-        'zero-shot-classification': {
-            'facebook/bart-large-mnli',
-        },
-    },
-    'beit': {
-        # Image classification
-        'image-classification': [
-            'microsoft/beit-base-patch16-224',
-            'microsoft/beit-base-patch16-224-pt22k',
-            'microsoft/beit-base-patch16-384',
-            'microsoft/beit-base-patch16-224-pt22k-ft22k',
-            'microsoft/beit-large-patch16-224',
-            'microsoft/beit-large-patch16-224-pt22k',
-            'microsoft/beit-large-patch16-512',
-            'microsoft/beit-large-patch16-224-pt22k-ft22k',
-            'microsoft/beit-large-patch16-384',
-            'microsoft/dit-base-finetuned-rvlcdip',
-            'microsoft/dit-large-finetuned-rvlcdip',
-        ],
-    },
-    'bert': {
-        # Feature extraction
-        'feature-extraction': [
-            'sentence-transformers/all-MiniLM-L6-v2',
-            'sentence-transformers/all-MiniLM-L12-v2',
-            'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2',
-            'sentence-transformers/paraphrase-MiniLM-L6-v2',
-            'sentence-transformers/paraphrase-MiniLM-L3-v2',
-            'sentence-transformers/bert-base-nli-mean-tokens',
-            'sentence-transformers/multi-qa-MiniLM-L6-cos-v1',
-            'sentence-transformers/xlm-r-100langs-bert-base-nli-stsb-mean-tokens',
-            'sentence-transformers/LaBSE',
-            'deepset/sentence_bert',
-            'intfloat/e5-small',
-            'intfloat/e5-small-v2',
-            'intfloat/e5-base',
-            'intfloat/e5-base-v2',
-            'intfloat/e5-large',
-            'intfloat/e5-large-v2',
-            'intfloat/multilingual-e5-base',
-            'thenlper/gte-small',
-            'thenlper/gte-base',
-            'thenlper/gte-large',
-            'BAAI/bge-small-en',
-            'BAAI/bge-base-en',
-            'BAAI/bge-large-en',
-            'BAAI/bge-large-en-v1.5',
-            'BAAI/bge-base-en-v1.5',
-            'BAAI/bge-small-en-v1.5',
-            'BAAI/bge-large-zh-v1.5',
-            'BAAI/bge-base-zh-v1.5',
-            'BAAI/bge-small-zh-v1.5',
-            'allenai/scibert_scivocab_uncased',
-            'SpanBERT/spanbert-large-cased',
-            'SpanBERT/spanbert-base-cased',
-            'cambridgeltl/SapBERT-from-PubMedBERT-fulltext',
-            'indobenchmark/indobert-base-p1',
-            'GanjinZero/UMLSBert_ENG',
-            'DeepPavlov/rubert-base-cased',
-            'monologg/kobert',
-        ],
-
-        # Text classification
-        'text-classification': [
-            'nlptown/bert-base-multilingual-uncased-sentiment',
-            'ProsusAI/finbert',
-            'unitary/toxic-bert',
-            'BAAI/bge-reranker-large',
-            'BAAI/bge-reranker-base',
-            'cross-encoder/ms-marco-TinyBERT-L-2-v2',
-            'cross-encoder/ms-marco-MiniLM-L-2-v2',
-            'cross-encoder/ms-marco-MiniLM-L-4-v2',
-            'cross-encoder/ms-marco-MiniLM-L-6-v2',
-            'cross-encoder/ms-marco-MiniLM-L-12-v2',
-        ],
-
-        # Token classification
-        'token-classification': [
-            'Davlan/bert-base-multilingual-cased-ner-hrl',
-            'ckiplab/bert-base-chinese-ner',
-            'ckiplab/bert-base-chinese-ws',
-            'ckiplab/bert-base-chinese-pos',
-            'dslim/bert-base-NER',
-            'dslim/bert-base-NER-uncased',
-        ],
-
-        # Masked language modelling
-        'fill-mask': [
-            'bert-base-uncased',
-            'bert-base-cased',
-            'bert-base-multilingual-uncased',
-            'bert-base-multilingual-cased',
-            'bert-base-chinese',
-            'emilyalsentzer/Bio_ClinicalBERT',
-        ],
-    },
-    'blenderbot': {
-        # Text-to-text (TODO add conversational)
-        'text2text-generation': [
-            'facebook/blenderbot-400M-distill',
-            # 'facebook/blenderbot-1B-distill',
-        ],
-    },
-    'blenderbot-small': {
-        # Text-to-text (TODO add conversational)
-        'text2text-generation': [
-            # 'facebook/blenderbot-90M',  # DEPRECATED
-            'facebook/blenderbot_small-90M',
-        ],
-    },
-    'bloom': {
-        # Text generation
-        'text-generation': [
-            'bigscience/bloom-560m',
-            'bigscience/bloomz-560m',
-        ],
-    },
-
-    'camembert': {
-        # Feature extraction
-        'feature-extraction': [
-            'dangvantuan/sentence-camembert-large',
-        ],
-
-        # Token classification
-        'token-classification': [
-            'Jean-Baptiste/camembert-ner',
-            'Jean-Baptiste/camembert-ner-with-dates',
-            'pythainlp/thainer-corpus-v2-base-model',
-            'gilf/french-camembert-postag-model',
-        ],
-
-        # Masked language modelling
-        'fill-mask': [
-            'camembert-base',
-            'airesearch/wangchanberta-base-att-spm-uncased',
-        ],
-    },
-    'clap': {
-        # Zero-shot audio classification and feature extraction
-        # (with and without `--split_modalities`)
-        'zero-shot-audio-classification': {
-            'laion/clap-htsat-unfused',
-            # TODO add 'laion/clap-htsat-fused',
-            'laion/larger_clap_general',
-            'laion/larger_clap_music_and_speech',
-            # 'Xenova/tiny-random-ClapModel',
-        }
-    },
-    'chinese_clip': {
-        # Zero-shot image classification
-        # TODO: Add `--split_modalities` option
-        'zero-shot-image-classification': [
-            'OFA-Sys/chinese-clip-vit-base-patch16',
-            'OFA-Sys/chinese-clip-vit-large-patch14',
-            'OFA-Sys/chinese-clip-vit-large-patch14-336px',
-            # 'OFA-Sys/chinese-clip-vit-huge-patch14', # TODO add
-        ],
-    },
-    'clip': {
-        # Zero-shot image classification (and feature extraction)
-        # (with and without `--split_modalities`)
-        'zero-shot-image-classification': [
-            'openai/clip-vit-base-patch16',
-            'openai/clip-vit-base-patch32',
-            'openai/clip-vit-large-patch14',
-            'openai/clip-vit-large-patch14-336',
-        ],
-    },
-    'clipseg': {
-        # Image segmentation
-        'image-segmentation': [
-            'CIDAS/clipseg-rd64-refined',
-            'CIDAS/clipseg-rd64',
-            'CIDAS/clipseg-rd16',
-        ],
-    },
-    'codegen': {
-        # Text generation
-        'text-generation': [
-            'Salesforce/codegen-350M-mono',
-            'Salesforce/codegen-350M-multi',
-            'Salesforce/codegen-350M-nl',
-        ],
-    },
-    'convbert': {
-        # Feature extraction
-        'feature-extraction': [
-            'YituTech/conv-bert-small',
-            'YituTech/conv-bert-medium-small',
-            'YituTech/conv-bert-base',
-        ],
-    },
-    'convnext': {
-        # Image classification
-        'image-classification': [
-            'facebook/convnext-tiny-224',
-            'facebook/convnext-small-224',
-            'facebook/convnext-base-224',
-            'facebook/convnext-base-224-22k',
-            'facebook/convnext-base-224-22k-1k',
-            'facebook/convnext-base-384',
-            'facebook/convnext-base-384-22k-1k',
-            'facebook/convnext-large-224',
-            'facebook/convnext-large-224-22k',
-            'facebook/convnext-large-224-22k-1k',
-            'facebook/convnext-large-384',
-            'facebook/convnext-large-384-22k-1k',
-            'facebook/convnext-xlarge-224-22k',
-            'facebook/convnext-xlarge-224-22k-1k',
-            'facebook/convnext-xlarge-384-22k-1k',
-        ],
-    },
-    'convnextv2': {
-        # Image classification
-        'image-classification': [
-            'facebook/convnextv2-atto-1k-224',
-            'facebook/convnextv2-femto-1k-224',
-            'facebook/convnextv2-pico-1k-224',
-            'facebook/convnextv2-tiny-1k-224',
-            'facebook/convnextv2-tiny-22k-384',
-            'facebook/convnextv2-tiny-22k-224',
-            'facebook/convnextv2-nano-1k-224',
-            'facebook/convnextv2-nano-22k-384',
-            'facebook/convnextv2-base-22k-224',
-            'facebook/convnextv2-base-1k-224',
-            'facebook/convnextv2-base-22k-384',
-            'facebook/convnextv2-large-22k-224',
-            'facebook/convnextv2-large-1k-224',
-            'facebook/convnextv2-large-22k-384',
-            # 'facebook/convnextv2-huge-22k-512',
-            # 'facebook/convnextv2-huge-1k-224',
-            # 'facebook/convnextv2-huge-22k-384',
-            # 'facebook/convnextv2-nano-22k-224',
-        ],
-    },
-    'deberta': {
-        # Zero-shot classification
-        'zero-shot-classification': [
-            'cross-encoder/nli-deberta-base',
-            'Narsil/deberta-large-mnli-zero-cls',
-        ],
-    },
-    'deberta-v2': {
-        # Zero-shot classification
-        'zero-shot-classification': [
-            'cross-encoder/nli-deberta-v3-xsmall',
-            'cross-encoder/nli-deberta-v3-small',
-            'cross-encoder/nli-deberta-v3-base',
-            'cross-encoder/nli-deberta-v3-large',
-            'MoritzLaurer/DeBERTa-v3-xsmall-mnli-fever-anli-ling-binary',
-            'MoritzLaurer/DeBERTa-v3-base-mnli',
-            'MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli',
-            'MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli',
-            'MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7',
-            'sileod/deberta-v3-base-tasksource-nli',
-            'sileod/deberta-v3-large-tasksource-nli',
-        ],
-    },
-    # TODO: Add back in v3
-    # 'decision-transformer': {
-    #     # Reinforcement learning
-    #     'reinforcement-learning': [
-    #         'edbeeching/decision-transformer-gym-hopper-expert',
-    #         'edbeeching/decision-transformer-gym-hopper-medium',
-    #         'edbeeching/decision-transformer-gym-hopper-medium-replay',
-    #         'edbeeching/decision-transformer-gym-hopper-expert-new',
-    #         'edbeeching/decision-transformer-gym-halfcheetah-expert',
-    #         'edbeeching/decision-transformer-gym-halfcheetah-medium',
-    #         'edbeeching/decision-transformer-gym-halfcheetah-medium-replay',
-    #         'edbeeching/decision-transformer-gym-walker2d-expert',
-    #         'edbeeching/decision-transformer-gym-walker2d-medium',
-    #         'edbeeching/decision-transformer-gym-walker2d-medium-replay',
-    #     ],
-    # },
-    'deit': {
-        # Image classification
-        'image-classification': [
-            'facebook/deit-tiny-distilled-patch16-224',
-            'facebook/deit-small-distilled-patch16-224',
-            'facebook/deit-base-distilled-patch16-224',
-            'facebook/deit-base-distilled-patch16-384',
-        ],
-    },
-    'detr': {
-        # Object detection
-        'object-detection': [
-            'facebook/detr-resnet-50',
-            'facebook/detr-resnet-101',
-        ],
-
-        # Image segmentation
-        'image-segmentation': [
-            'facebook/detr-resnet-50-panoptic',
-        ],
-    },
-    'dinov2': {
-        # Feature extraction
-        'feature-extraction': [
-            'facebook/dinov2-small',
-            'facebook/dinov2-base',
-            'facebook/dinov2-large',
-            # 'facebook/dinov2-giant',  # TODO add
-        ],
-
-        # Image classification
-        'image-classification': [
-            'facebook/dinov2-small-imagenet1k-1-layer',
-            'facebook/dinov2-base-imagenet1k-1-layer',
-            'facebook/dinov2-large-imagenet1k-1-layer',
-            # 'facebook/dinov2-giant-imagenet1k-1-layer',  # TODO add
-        ],
-    },
-    'distilbert': {
-        # Feature extraction
-        'feature-extraction': [
-            'sentence-transformers/multi-qa-distilbert-cos-v1',
-            'sentence-transformers/distiluse-base-multilingual-cased-v1',
-            'sentence-transformers/distiluse-base-multilingual-cased-v2',
-            'sentence-transformers/distilbert-base-nli-mean-tokens',
-            'sentence-transformers/distilbert-base-nli-stsb-mean-tokens',
-            'sentence-transformers/msmarco-distilbert-base-v4',
-        ],
-
-        # Text classification
-        'text-classification': [
-            'distilbert-base-uncased-finetuned-sst-2-english',
-        ],
-
-        # Question answering
-        'question-answering': [
-            'distilbert-base-uncased-distilled-squad',
-            'distilbert-base-cased-distilled-squad',
-        ],
-
-        # Zero-shot classification
-        'zero-shot-classification': [
-            'typeform/distilbert-base-uncased-mnli',
-        ],
-
-        # Token classification
-        'token-classification': [
-            'Davlan/distilbert-base-multilingual-cased-ner-hrl',
-        ],
-
-        # Masked language modelling
-        'fill-mask': [
-            'distilbert-base-uncased',
-            'distilbert-base-cased',
-        ],
-    },
-    'dit': {  # NOTE: DiT has the same architecture as BEiT.
-        # Feature extraction
-        # NOTE: requires --task feature-extraction
-        'feature-extraction': [
-            'microsoft/dit-base',
-            'microsoft/dit-large',
-        ],
-
-        # Image classification
-        'image-classification': [
-            'microsoft/dit-base-finetuned-rvlcdip',
-            'microsoft/dit-large-finetuned-rvlcdip',
-        ],
-    },
-    'donut': {  # NOTE: also a `vision-encoder-decoder`
-        # Image-to-text
-        'image-to-text': [
-            'naver-clova-ix/donut-base-finetuned-cord-v2',
-            'naver-clova-ix/donut-base-finetuned-zhtrainticket',
-        ],
-
-        # Document Question Answering
-        'document-question-answering': [
-            'naver-clova-ix/donut-base-finetuned-docvqa',
-        ],
-    },
-    'dpt': {
-        # Depth estimation
-        'depth-estimation': [
-            'Intel/dpt-hybrid-midas',
-            'Intel/dpt-large',
-        ],
-    },
-    'depth_anything': {
-        # Depth estimation
-        # NOTE: requires --task depth-estimation
-        'depth-estimation': [
-            'LiheYoung/depth-anything-small-hf',
-            'LiheYoung/depth-anything-base-hf',
-            'LiheYoung/depth-anything-large-hf',
-        ],
-    },
-    'electra': {
-        # Feature extraction
-        'feature-extraction': [
-            # NOTE: requires --task feature-extraction
-            'google/electra-small-discriminator',
-            'google/electra-base-discriminator',
-        ],
-    },
-    'esm': {
-        # Masked language modelling
-        'fill-mask': [
-            # with and without --task feature-extraction
-            'InstaDeepAI/nucleotide-transformer-500m-human-ref',
-            'InstaDeepAI/nucleotide-transformer-500m-1000g',
-
-            # NOTE: requires --opset 12
-            'facebook/esm2_t6_8M_UR50D',
-            'facebook/esm2_t12_35M_UR50D',
-            'facebook/esm2_t30_150M_UR50D',
-            'facebook/esm2_t33_650M_UR50D',
-        ],
-
-        # Token classification
-        'token-classification': [
-            'AmelieSchreiber/esm2_t6_8M_UR50D_rna_binding_site_predictor',
-        ],
-
-        # Zero-shot classification
-        'zero-shot-classification': [
-            'AmelieSchreiber/esm2_t6_8M_UR50D_sequence_classifier_v1',
-        ],
-    },
-    'falcon': {
-        # Text generation
-        'text-generation': [
-            'Rocketknight1/tiny-random-falcon-7b',
-            'fxmarty/really-tiny-falcon-testing',
-        ],
-    },
-    'fastvit': {
-        # Image classification
-        'image-classification': [
-            # NOTE: Supported by timm, but not by transformers
-            # 'timm/fastvit_t8.apple_in1k',
-            # 'timm/fastvit_t8.apple_dist_in1k',
-            # 'timm/fastvit_t12.apple_in1k',
-            # 'timm/fastvit_t12.apple_dist_in1k',
-            # 'timm/fastvit_s12.apple_in1k',
-            # 'timm/fastvit_s12.apple_dist_in1k',
-            # 'timm/fastvit_sa12.apple_in1k',
-            # 'timm/fastvit_sa12.apple_dist_in1k',
-            # 'timm/fastvit_sa24.apple_in1k',
-            # 'timm/fastvit_sa24.apple_dist_in1k',
-            # 'timm/fastvit_sa36.apple_in1k',
-            # 'timm/fastvit_sa36.apple_dist_in1k',
-            # 'timm/fastvit_ma36.apple_in1k',
-            # 'timm/fastvit_ma36.apple_dist_in1k',
-        ],
-    },
-    'glpn': {
-        # Depth estimation
-        'depth-estimation': [
-            'vinvino02/glpn-kitti',
-            'vinvino02/glpn-nyu',
-        ],
-    },
-    'gpt_neo': {
-        # Text generation
-        'text-generation': [
-            'EleutherAI/gpt-neo-125M',
-            'MBZUAI/LaMini-Neo-125M',
-            # 'MBZUAI/LaMini-Neo-1.3B', # TODO add
-            'iliemihai/gpt-neo-romanian-125m',
-        ],
-    },
-    'gpt_neox': {
-        # Text generation
-        'text-generation': [
-            'EleutherAI/pythia-14m',
-            'EleutherAI/pythia-31m',
-            'EleutherAI/pythia-70m',
-            'EleutherAI/pythia-70m-deduped',
-            'EleutherAI/pythia-160m',
-            'EleutherAI/pythia-160m-deduped',
-            'EleutherAI/pythia-410m',
-            'EleutherAI/pythia-410m-deduped',
-        ],
-    },
-    'gpt2': {
-        # Text generation
-        'text-generation': [
-            'gpt2',
-            'distilgpt2',
-            'MBZUAI/LaMini-Cerebras-111M',
-            'MBZUAI/LaMini-Cerebras-256M',
-            'MBZUAI/LaMini-Cerebras-590M',
-            # 'MBZUAI/LaMini-Cerebras-1.3B', # TODO add
-            'MBZUAI/LaMini-GPT-124M',
-            'MBZUAI/LaMini-GPT-774M',
-            # 'MBZUAI/LaMini-GPT-1.5B', # TODO add
-            'aisquared/dlite-v2-774m',
-            'Locutusque/gpt2-large-conversational',
-        ],
-    },
-    'gpt_bigcode': {
-        # Text generation
-        'text-generation': [
-            'bigcode/tiny_starcoder_py',
-            'abacaj/starcoderbase-1b-sft',
-            # 'bigcode/starcoderbase-1b', # NOTE: This model is gated, so we ignore it when testing
-        ],
-    },
-    'gptj': {
-        # Text generation
-        'text-generation': [
-            'TabbyML/J-350M',
-            'Milos/slovak-gpt-j-405M',
-            'heegyu/kogpt-j-350m',
-        ],
-    },
-    'herbert': {
-        # Feature extraction
-        'feature-extraction': [
-            'allegro/herbert-base-cased',
-            'allegro/herbert-large-cased',
-        ],
-    },
-    'hubert': {
-        # Feature extraction
-        'feature-extraction': [
-            'facebook/hubert-base-ls960',
-        ],
-
-        # Audio classification
-        'audio-classification': [
-            'superb/hubert-base-superb-ks',
-        ],
-
-        # Automatic speech recognition
-        'automatic-speech-recognition': [
-            'facebook/hubert-large-ls960-ft',
-        ],
-    },
-    'llama': {
-        # Text generation
-        'text-generation': [
-            'Xenova/llama2.c-stories15M',
-            'Xenova/llama2.c-stories42M',
-            'Xenova/llama2.c-stories110M',
-            'RajuKandasamy/tamillama_tiny_30m',
-            'JackFram/llama-68m',
-            'JackFram/llama-160m',
-        ],
-    },
-    'longt5': {
-        # Text-to-text
-        'text2text-generation': [
-            'google/long-t5-local-base',
-            'google/long-t5-tglobal-base',
-            # 'google/long-t5-tglobal-xl', # too large
-            # 'google/long-t5-tglobal-large', # too large
-            # 'google/long-t5-local-large', # too large
-        ],
-
-        # Summarization
-        'summarization': [
-            'pszemraj/long-t5-tglobal-base-16384-book-summary',
-        ],
-
-        # Feature extraction
-        'feature-extraction': [
-            # NOTE: requires --task feature-extraction
-            'voidful/long-t5-encodec-tglobal-base',
-        ],
-    },
-    'm2m_100': {
-        # Translation
-        'translation': [
-            'facebook/nllb-200-distilled-600M',
-            'facebook/m2m100_418M',
-        ],
-    },
-    'marian': {
-        # Translation
-        'translation': [
-            f'Helsinki-NLP/opus-mt-{x}'
-            for x in SUPPORTED_HELSINKI_NLP_MODELS
-        ],
-    },
-    'mbart': {
-        # Translation
-        'translation': [
-            'facebook/mbart-large-50-many-to-many-mmt',
-            'facebook/mbart-large-50-many-to-one-mmt',
-            'facebook/mbart-large-50',
-        ],
-    },
-    'mistral': {
-        # Text generation
-        'text-generation': [
-            'echarlaix/tiny-random-mistral',
-        ],
-    },
-    'mobilebert': {
-        # Zero-shot classification
-        'zero-shot-classification': [
-            'typeform/mobilebert-uncased-mnli',
-
-            # TODO:
-            # https://github.com/huggingface/optimum/issues/1027
-            # 'google/mobilebert-uncased',
-        ],
-    },
-    'mobilevit': {
-        # Image classification
-        'image-classification': [
-            'apple/mobilevit-small',
-            'apple/mobilevit-x-small',
-            'apple/mobilevit-xx-small',
-        ],
-
-        # TODO: Image segmentation
-        # 'image-segmentation': [
-        #     'apple/deeplabv3-mobilevit-small',
-        #     'apple/deeplabv3-mobilevit-x-small',
-        #     'apple/deeplabv3-mobilevit-xx-small',
-        # ],
-    },
-    'mobilevitv2': {
-        # Image classification
-        'image-classification': [
-            'apple/mobilevitv2-1.0-imagenet1k-256',
-        ],
-
-        # TODO: Image segmentation
-        # 'image-segmentation': [
-        #     'apple/mobilevitv2-1.0-voc-deeplabv3',
-        # ],
-    },
-    'mpt': {
-        # Text generation
-        'text-generation': [
-            'efederici/ipt-350m',
-        ],
-    },
-    'mpnet': {
-        # Feature extraction
-        'feature-extraction': [
-            'sentence-transformers/all-mpnet-base-v2',
-            'sentence-transformers/nli-mpnet-base-v2',
-            'sentence-transformers/paraphrase-mpnet-base-v2',
-            'sentence-transformers/paraphrase-multilingual-mpnet-base-v2',
-            'sentence-transformers/multi-qa-mpnet-base-cos-v1',
-            'sentence-transformers/multi-qa-mpnet-base-dot-v1',
-        ],
-    },
-    'mt5': {
-        # Text-to-text
-        'text2text-generation': [
-            'google/mt5-small',
-            'google/mt5-base',
-        ],
-    },
-    'nougat': {
-        # Image-to-text
-        'image-to-text': [
-            'facebook/nougat-small',
-            'facebook/nougat-base',
-        ],
-    },
-    'opt': {
-        # Text generation
-        'text-generation': [
-            # Text generation
-            'facebook/opt-125m',
-            'facebook/opt-350m',
-            # (TODO conversational)
-            'PygmalionAI/pygmalion-350m',
-        ],
-    },
-    'owlv2': {
-        # Object detection (Zero-shot object detection)
-        # NOTE: Exported with --batch_size 1
-        'zero-shot-object-detection': [
-            'google/owlv2-base-patch16',
-            'google/owlv2-base-patch16-finetuned',
-            'google/owlv2-base-patch16-ensemble',
-            # TODO: add
-            # 'google/owlv2-large-patch14',
-            # 'google/owlv2-large-patch14-finetuned',
-            # 'google/owlv2-large-patch14-ensemble',
-        ],
-    },
-    'owlvit': {
-        # Object detection (Zero-shot object detection)
-        # NOTE: Exported with --batch_size 1
-        'zero-shot-object-detection': [
-            'google/owlvit-base-patch32',
-            'google/owlvit-base-patch16',
-            'google/owlvit-large-patch14',
-        ],
-    },
-    'resnet': {
-        # Image classification
-        'image-classification': [
-            'microsoft/resnet-18',
-            'microsoft/resnet-26',
-            'microsoft/resnet-34',
-            'microsoft/resnet-50',
-            'microsoft/resnet-101',
-            'microsoft/resnet-152',
-        ],
-    },
-    'roformer': {
-        # Feature extraction
-        'feature-extraction': [
-            'hf-tiny-model-private/tiny-random-RoFormerModel',
-        ],
-
-        # Text classification
-        'text-classification': [
-            'hf-tiny-model-private/tiny-random-RoFormerForSequenceClassification',
-        ],
-
-        # Token classification
-        'token-classification': [
-            'hf-tiny-model-private/tiny-random-RoFormerForTokenClassification',
-        ],
-
-        # TODO
-        # # Text generation
-        # 'text-generation': [
-        #     'hf-tiny-model-private/tiny-random-RoFormerForCausalLM',
-        # ],
-
-        # Masked language modelling
-        'fill-mask': [
-            'alchemab/antiberta2',
-            'hf-tiny-model-private/tiny-random-RoFormerForMaskedLM',
-        ],
-
-        # Question answering
-        'question-answering': [
-            'hf-tiny-model-private/tiny-random-RoFormerForQuestionAnswering',
-        ],
-
-        # Multiple choice
-        'multiple-choice': [
-            'hf-tiny-model-private/tiny-random-RoFormerForMultipleChoice',
-        ],
-    },
-    'phi': {
-        # Text generation
-        'text-generation': [
-            'hf-internal-testing/tiny-random-PhiForCausalLM',
-            'susnato/phi-1_5_dev',
-        ],
-    },
-    'qwen2': {
-        # Text generation
-        'text-generation': [
-            'Qwen/Qwen1.5-0.5B',
-            'Qwen/Qwen1.5-0.5B-Chat',
-            'Qwen/Qwen1.5-1.8B',
-            'Qwen/Qwen1.5-1.8B-Chat',
-        ],
-    },
-    'roberta': {
-        # Feature extraction
-        'feature-extraction': [
-            'sentence-transformers/all-distilroberta-v1',
-            'sentence-transformers/all-roberta-large-v1',
-        ],
-
-        # Text classification
-        'text-classification': [
-            'roberta-large-mnli',
-        ],
-
-        # Token classification
-        'token-classification': [
-            'julien-c/EsperBERTo-small-pos',
-        ],
-
-        # Masked language modelling
-        'fill-mask': [
-            'roberta-base',
-            'distilroberta-base',
-        ],
-    },
-    'sam': {
-        # Mask generation
-        'mask-generation': [
-            # SAM
-            'facebook/sam-vit-base',
-            'facebook/sam-vit-large',
-            'facebook/sam-vit-huge',
-            'wanglab/medsam-vit-base',
-
-            # SlimSAM
-            'nielsr/slimsam-50-uniform',
-            'nielsr/slimsam-77-uniform',
-        ],
-    },
-    'segformer': {
-        # Image segmentation
-        'image-segmentation': [
-            'mattmdjaga/segformer_b0_clothes',
-            'mattmdjaga/segformer_b2_clothes',
-            'jonathandinu/face-parsing',
-
-            'nvidia/segformer-b0-finetuned-cityscapes-768-768',
-            'nvidia/segformer-b0-finetuned-cityscapes-512-1024',
-            'nvidia/segformer-b0-finetuned-cityscapes-640-1280',
-            'nvidia/segformer-b0-finetuned-cityscapes-1024-1024',
-            'nvidia/segformer-b1-finetuned-cityscapes-1024-1024',
-            'nvidia/segformer-b2-finetuned-cityscapes-1024-1024',
-            'nvidia/segformer-b3-finetuned-cityscapes-1024-1024',
-            'nvidia/segformer-b4-finetuned-cityscapes-1024-1024',
-            'nvidia/segformer-b5-finetuned-cityscapes-1024-1024',
-            'nvidia/segformer-b0-finetuned-ade-512-512',
-            'nvidia/segformer-b1-finetuned-ade-512-512',
-            'nvidia/segformer-b2-finetuned-ade-512-512',
-            'nvidia/segformer-b3-finetuned-ade-512-512',
-            'nvidia/segformer-b4-finetuned-ade-512-512',
-            'nvidia/segformer-b5-finetuned-ade-640-640',
-        ],
-
-        # Image classification
-        'image-classification': [
-            'nvidia/mit-b0',
-            'nvidia/mit-b1',
-            'nvidia/mit-b2',
-            'nvidia/mit-b3',
-            'nvidia/mit-b4',
-            'nvidia/mit-b5',
-        ],
-    },
-    'siglip': {
-        # Zero-shot image classification and feature extraction
-        # (with and without `--split_modalities`)
-        # NOTE: requires --opset 13
-        'zero-shot-image-classification': [
-            'nielsr/siglip-base-patch16-224',
-        ],
-    },
-    'speecht5': {
-        # Text-to-audio/Text-to-speech
-        'text-to-audio': [
-            'microsoft/speecht5_tts',
-        ],
-    },
-    'stablelm': {
-        # Text generation
-        'text-generation': [
-            'hf-internal-testing/tiny-random-StableLmForCausalLM',
-            'stabilityai/stablelm-2-1_6b',
-            'stabilityai/stablelm-2-zephyr-1_6b',
-        ],
-    },
-    'squeezebert': {
-        # Feature extraction
-        'feature-extraction': [
-            'squeezebert/squeezebert-uncased',
-            'squeezebert/squeezebert-mnli',
-        ],
-    },
-    'starcoder2': {
-        # Text generation
-        'text-generation': [
-            'hf-internal-testing/tiny-random-Starcoder2ForCausalLM',
-        ],
-    },
-    'swin': {
-        # Image classification
-        'image-classification': [
-            'microsoft/swin-tiny-patch4-window7-224',
-            'microsoft/swin-base-patch4-window7-224',
-            'microsoft/swin-large-patch4-window12-384-in22k',
-            'microsoft/swin-base-patch4-window7-224-in22k',
-            'microsoft/swin-base-patch4-window12-384-in22k',
-            'microsoft/swin-base-patch4-window12-384',
-            'microsoft/swin-large-patch4-window7-224',
-            'microsoft/swin-small-patch4-window7-224',
-            'microsoft/swin-large-patch4-window7-224-in22k',
-            'microsoft/swin-large-patch4-window12-384',
-        ],
-    },
-    'swin2sr': {
-        # Image-to-image (Super-resolution)
-        'image-to-image': [
-            'caidas/swin2SR-classical-sr-x2-64',
-            'caidas/swin2SR-realworld-sr-x4-64-bsrgan-psnr',
-            'caidas/swin2SR-classical-sr-x4-64',
-            'caidas/swin2SR-compressed-sr-x4-48',
-            'caidas/swin2SR-lightweight-x2-64',
-        ],
-
-        # Feature extraction
-        'feature-extraction': [
-            'hf-tiny-model-private/tiny-random-Swin2SRModel',
-        ],
-    },
-    't5': {
-        # Translation/Summarization
-        ('translation', 'summarization'): [
-            't5-small',
-            't5-base',
-            'google/t5-v1_1-small',
-            'google/t5-v1_1-base',
-            'google/flan-t5-small',
-            'google/flan-t5-base',
-        ],
-
-        # Text-to-text
-        'text2text-generation': [
-            'MBZUAI/LaMini-Flan-T5-77M',
-            'MBZUAI/LaMini-Flan-T5-248M',
-            'MBZUAI/LaMini-Flan-T5-783M',
-            'MBZUAI/LaMini-T5-61M',
-            'MBZUAI/LaMini-T5-223M',
-            'MBZUAI/LaMini-T5-738M',
-            'declare-lab/flan-alpaca-base',
-            'declare-lab/flan-alpaca-large',
-        ],
-
-        # Feature extraction
-        'feature-extraction': [
-            'sentence-transformers/sentence-t5-large',
-            'hkunlp/instructor-base',
-            'hkunlp/instructor-large',
-        ],
-    },
-    'table-transformer': {
-        # Object detection
-        'object-detection': [
-            'microsoft/table-transformer-detection',
-            'microsoft/table-transformer-structure-recognition',
-            'microsoft/table-transformer-structure-recognition-v1.1-all',
-            'microsoft/table-transformer-structure-recognition-v1.1-fin',
-            'microsoft/table-transformer-structure-recognition-v1.1-pub',
-        ],
-    },
-    'trocr': {  # NOTE: also a `vision-encoder-decoder`
-        # Text-to-image
-        'text-to-image': [
-            'microsoft/trocr-small-printed',
-            'microsoft/trocr-base-printed',
-            'microsoft/trocr-small-handwritten',
-            'microsoft/trocr-base-handwritten',
-        ],
-    },
-    'unispeech': {
-        # Feature extraction
-        'feature-extraction': [
-            # Requires --task feature-extraction
-            'microsoft/unispeech-large-1500h-cv',
-        ],
-        # TODO: add support for
-        # # Automatic speech recognition
-        # 'automatic-speech-recognition': [
-        #     'microsoft/unispeech-1350-en-353-fr-ft-1h',
-        #     'microsoft/unispeech-1350-en-17h-ky-ft-1h',
-        #     'microsoft/unispeech-1350-en-90-it-ft-1h',
-        #     'microsoft/unispeech-1350-en-168-es-ft-1h',
-        # ],
-    },
-    'unispeech-sat': {
-        # Feature extraction
-        'feature-extraction': [
-            # Requires --task feature-extraction
-            'microsoft/unispeech-sat-base',
-        ],
-
-        # Audio XVector (e.g., for speaker verification)
-        'audio-xvector': [
-            'microsoft/unispeech-sat-base-plus-sv',
-            'microsoft/unispeech-sat-base-sv',
-            'microsoft/unispeech-sat-large-sv',
-        ],
-
-        # Audio frame classification
-        'audio-frame-classification': [
-            'microsoft/unispeech-sat-base-plus-sd',
-        ],
-
-        # Automatic speech recognition
-        'automatic-speech-recognition': [
-            'microsoft/unispeech-sat-base-100h-libri-ft',
-        ],
-    },
-    'vision-encoder-decoder': {
-        # Image-to-text
-        'image-to-text': [
-            'nlpconnect/vit-gpt2-image-captioning',
-        ],
-    },
-    'vit': {
-        # Feature extraction
-        'feature-extraction': [
-            'google/vit-base-patch16-224-in21k',
-            'facebook/dino-vitb16',
-            'facebook/dino-vits8',
-            'facebook/dino-vitb8',
-            'facebook/dino-vits16',
-        ],
-        # Image classification
-        'image-classification': [
-            'google/vit-base-patch16-224',
-        ],
-    },
-    'vitmatte': {
-        # Image matting
-        'image-matting': [
-            'hustvl/vitmatte-small-distinctions-646',
-            'hustvl/vitmatte-base-distinctions-646',
-            'hustvl/vitmatte-small-composition-1k',
-            'hustvl/vitmatte-base-composition-1k',
-        ],
-    },
-    'vits': {
-        # Text-to-audio/Text-to-speech/Text-to-waveform
-        'text-to-waveform': {
-            # NOTE: requires --task text-to-waveform --skip_validation
-            'echarlaix/tiny-random-vits',
-            'facebook/mms-tts-eng',
-            'facebook/mms-tts-rus',
-            'facebook/mms-tts-hin',
-            'facebook/mms-tts-yor',
-            'facebook/mms-tts-spa',
-            'facebook/mms-tts-fra',
-            'facebook/mms-tts-ara',
-            'facebook/mms-tts-ron',
-            'facebook/mms-tts-vie',
-            'facebook/mms-tts-deu',
-            'facebook/mms-tts-kor',
-            'facebook/mms-tts-por',
-            # TODO add more checkpoints from
-            # https://huggingface.co/models?other=vits&sort=trending&search=facebook-tts
-        }
-    },
-    'wav2vec2': {
-        # Feature extraction # NOTE: requires --task feature-extraction
-        'feature-extraction': [
-            'facebook/mms-300m',
-            'facebook/mms-1b',
-        ],
-
-        # Audio classification
-        'audio-classification': [
-            'alefiury/wav2vec2-large-xlsr-53-gender-recognition-librispeech',
-            'superb/wav2vec2-base-superb-ks',
-            'facebook/mms-lid-126',
-            'facebook/mms-lid-256',
-            'facebook/mms-lid-512',
-            'facebook/mms-lid-1024',
-            'facebook/mms-lid-2048',
-            'facebook/mms-lid-4017',
-        ],
-
-        # Audio frame classification
-        'audio-frame-classification': [
-            'anton-l/wav2vec2-base-superb-sd',
-        ],
-
-        # Automatic speech recognition
-        'automatic-speech-recognition': [
-            'jonatasgrosman/wav2vec2-large-xlsr-53-english',
-            'facebook/wav2vec2-base-960h',
-            'facebook/mms-1b-l1107',
-            'facebook/mms-1b-all',
-            'facebook/mms-1b-fl102',
-        ],
-    },
-    'wav2vec2-bert': {
-        'feature-extraction': [
-            'facebook/w2v-bert-2.0',
-        ],
-
-        # Automatic speech recognition
-        'automatic-speech-recognition': [
-            'hf-audio/wav2vec2-bert-CV16-en',
-        ],
-    },
-    'wavlm': {
-        # Feature extraction
-        'feature-extraction': [
-            'microsoft/wavlm-base',
-            'microsoft/wavlm-base-plus',
-            'microsoft/wavlm-large',
-        ],
-
-        # Audio frame classification
-        'audio-frame-classification': [
-            'anton-l/wav2vec2-base-superb-sd',
-            'microsoft/wavlm-base-plus-sd',
-        ],
-
-        # Audio XVector (e.g., for speaker verification)
-        'audio-xvector': [
-            'microsoft/wavlm-base-plus-sv',
-            'microsoft/wavlm-base-sv',
-        ],
-    },
-    'whisper': {
-        # Automatic speech recognition
-        'automatic-speech-recognition': [
-            'openai/whisper-tiny',
-            'openai/whisper-tiny.en',
-            'openai/whisper-base',
-            'openai/whisper-base.en',
-            'openai/whisper-small',
-            'openai/whisper-small.en',
-            'openai/whisper-medium',
-            'openai/whisper-medium.en',
-            'openai/whisper-large',
-            'openai/whisper-large-v2',
-            'NbAiLab/nb-whisper-tiny-beta',
-            'NbAiLab/nb-whisper-base-beta',
-            'NbAiLab/nb-whisper-small-beta',
-            'NbAiLab/nb-whisper-medium-beta',
-            'NbAiLab/nb-whisper-large-beta',
-        ],
-    },
-    'xlm': {
-        # Masked language modelling
-        'fill-mask': [
-            'xlm-clm-ende-1024',
-            'xlm-mlm-ende-1024',
-            'xlm-clm-enfr-1024',
-            'xlm-mlm-enfr-1024',
-            'xlm-mlm-17-1280',
-            'xlm-mlm-100-1280',
-            'xlm-mlm-en-2048',
-            'xlm-mlm-enro-1024',
-            'xlm-mlm-tlm-xnli15-1024',
-            'xlm-mlm-xnli15-1024',
-        ],
-    },
-    'xlm-roberta': {
-        # Masked language modelling
-        'fill-mask': [
-            'xlm-roberta-base'
-        ],
-    },
-    'yolos': {
-        # Object detection
-        'object-detection': [
-            # Object detection
-            'hustvl/yolos-tiny',
-            'hustvl/yolos-small',
-            'hustvl/yolos-base',
-            'hustvl/yolos-small-dwr',
-            'hustvl/yolos-small-300',
-        ],
-    },
-}
-
-
-def main():
-    for model_type, tasks in SUPPORTED_MODELS.items():
-        for task, model_ids in tasks.items():
-            print(f'# {model_type:=^80}')
-            for model_id in model_ids:
-                print(
-                    f'python -m scripts.convert --quantize --model_id {model_id}')
-            print()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/src/backends/onnx.js b/src/backends/onnx.js
index 0bee3dce7..de89da037 100644
--- a/src/backends/onnx.js
+++ b/src/backends/onnx.js
@@ -9,42 +9,208 @@
  * 
  * This module is not directly exported, but can be accessed through the environment variables:
  * ```javascript
- * import { env } from '@xenova/transformers';
+ * import { env } from '@huggingface/transformers';
  * console.log(env.backends.onnx);
  * ```
  * 
  * @module backends/onnx
  */
 
+import { env, apis } from '../env.js';
+
 // NOTE: Import order matters here. We need to import `onnxruntime-node` before `onnxruntime-web`.
 // In either case, we select the default export if it exists, otherwise we use the named export.
 import * as ONNX_NODE from 'onnxruntime-node';
-import * as ONNX_WEB from 'onnxruntime-web';
 
-/** @type {import('onnxruntime-web')} The ONNX runtime module. */
-export let ONNX;
+// Use subpath-imports to ensure Node.js and browser interoperability.
+// See package.json and https://nodejs.org/api/packages.html#subpath-imports
+// for more information.
+// @ts-ignore
+import * as ONNX_WEB from '#onnxruntime-webgpu';
+
+export { Tensor } from 'onnxruntime-common';
+
+/**
+ * @typedef {import('onnxruntime-common').InferenceSession.ExecutionProviderConfig} ONNXExecutionProviders
+ */
+
+/** @type {Record<import("../utils/devices.js").DeviceType, ONNXExecutionProviders>} */
+const DEVICE_TO_EXECUTION_PROVIDER_MAPPING = Object.freeze({
+    auto: null, // Auto-detect based on device and environment
+    gpu: null, // Auto-detect GPU
+    cpu: 'cpu', // CPU
+    wasm: 'wasm', // WebAssembly
+    webgpu: 'webgpu', // WebGPU
+    cuda: 'cuda', // CUDA
+    dml: 'dml', // DirectML
+
+    webnn: { name: 'webnn', deviceType: 'cpu' }, // WebNN (default)
+    'webnn-npu': { name: 'webnn', deviceType: 'npu' }, // WebNN NPU
+    'webnn-gpu': { name: 'webnn', deviceType: 'gpu' }, // WebNN GPU
+    'webnn-cpu': { name: 'webnn', deviceType: 'cpu' }, // WebNN CPU
+});
 
-export const executionProviders = [
-    // 'webgpu',
-    'wasm'
-];
+/** 
+ * The list of supported devices, sorted by priority/performance.
+ * @type {import("../utils/devices.js").DeviceType[]}
+ */
+const supportedDevices = [];
+
+/** @type {ONNXExecutionProviders[]} */
+let defaultDevices;
+let ONNX;
+const ORT_SYMBOL = Symbol.for('onnxruntime');
 
-if (typeof process !== 'undefined' && process?.release?.name === 'node') {
-    // Running in a node-like environment.
+if (ORT_SYMBOL in globalThis) {
+  // If the JS runtime exposes their own ONNX runtime, use it
+  ONNX = globalThis[ORT_SYMBOL];
+
+} else if (apis.IS_NODE_ENV) {
     ONNX = ONNX_NODE.default ?? ONNX_NODE;
 
-    // Add `cpu` execution provider, with higher precedence that `wasm`.
-    executionProviders.unshift('cpu');
+    // Updated as of ONNX Runtime 1.18.0
+    // The following table lists the supported versions of ONNX Runtime Node.js binding provided with pre-built binaries.
+    // | EPs/Platforms | Windows x64 | Windows arm64 | Linux x64         | Linux arm64 | MacOS x64 | MacOS arm64 |
+    // | ------------- | ----------- | ------------- | ----------------- | ----------- | --------- | ----------- |
+    // | CPU           | ✔️          | ✔️            | ✔️                | ✔️          | ✔️        | ✔️          |
+    // | DirectML      | ✔️          | ✔️            | ❌                | ❌          | ❌        | ❌          |
+    // | CUDA          | ❌          | ❌            | ✔️ (CUDA v11.8)   | ❌          | ❌        | ❌          |
+    switch (process.platform) {
+        case 'win32': // Windows x64 and Windows arm64
+            supportedDevices.push('dml');
+            break;
+        case 'linux': // Linux x64 and Linux arm64
+            if (process.arch === 'x64') {
+                supportedDevices.push('cuda');
+            }
+            break;
+        case 'darwin': // MacOS x64 and MacOS arm64
+            break;
+    }
 
+    supportedDevices.push('cpu');
+    defaultDevices = ['cpu'];
 } else {
-    // Running in a browser-environment
-    ONNX = ONNX_WEB.default ?? ONNX_WEB;
-
-    // SIMD for WebAssembly does not operate correctly in some recent versions of iOS (16.4.x).
-    // As a temporary fix, we disable it for now.
-    // For more information, see: https://github.com/microsoft/onnxruntime/issues/15644
-    const isIOS = typeof navigator !== 'undefined' && /iP(hone|od|ad).+16_4.+AppleWebKit/.test(navigator.userAgent);
-    if (isIOS) {
-        ONNX.env.wasm.simd = false;
+    ONNX = ONNX_WEB;
+
+    if (apis.IS_WEBNN_AVAILABLE) {
+        // TODO: Only push supported providers (depending on available hardware)
+        supportedDevices.push('webnn-npu', 'webnn-gpu', 'webnn-cpu', 'webnn');
+    }
+
+    if (apis.IS_WEBGPU_AVAILABLE) {
+        supportedDevices.push('webgpu');
     }
+
+    supportedDevices.push('wasm');
+    defaultDevices = ['wasm'];
 }
+
+// @ts-ignore
+const InferenceSession = ONNX.InferenceSession;
+
+/**
+ * Map a device to the execution providers to use for the given device.
+ * @param {import("../utils/devices.js").DeviceType|"auto"|null} [device=null] (Optional) The device to run the inference on.
+ * @returns {ONNXExecutionProviders[]} The execution providers to use for the given device.
+ */
+export function deviceToExecutionProviders(device = null) {
+    // Use the default execution providers if the user hasn't specified anything
+    if (!device) return defaultDevices;
+
+    // Handle overloaded cases
+    switch (device) {
+        case "auto":
+            return supportedDevices;
+        case "gpu":
+            return supportedDevices.filter(x =>
+                ["webgpu", "cuda", "dml", "webnn-gpu"].includes(x),
+            );
+    }
+
+    if (supportedDevices.includes(device)) {
+        return [DEVICE_TO_EXECUTION_PROVIDER_MAPPING[device] ?? device];
+    }
+
+    throw new Error(`Unsupported device: "${device}". Should be one of: ${supportedDevices.join(', ')}.`)
+}
+
+
+/**
+ * To prevent multiple calls to `initWasm()`, we store the first call in a Promise
+ * that is resolved when the first InferenceSession is created. Subsequent calls
+ * will wait for this Promise to resolve before creating their own InferenceSession.
+ * @type {Promise<any>|null}
+ */
+let wasmInitPromise = null;
+
+/**
+ * Create an ONNX inference session.
+ * @param {Uint8Array} buffer The ONNX model buffer.
+ * @param {import('onnxruntime-common').InferenceSession.SessionOptions} session_options ONNX inference session options.
+ * @param {Object} session_config ONNX inference session configuration.
+ * @returns {Promise<import('onnxruntime-common').InferenceSession & { config: Object}>} The ONNX inference session.
+ */
+export async function createInferenceSession(buffer, session_options, session_config) {
+    if (wasmInitPromise) {
+        // A previous session has already initialized the WASM runtime
+        // so we wait for it to resolve before creating this new session.
+        await wasmInitPromise;
+    }
+
+    const sessionPromise = InferenceSession.create(buffer, session_options);
+    wasmInitPromise ??= sessionPromise;
+    const session = await sessionPromise;
+    session.config = session_config;
+    return session;
+}
+
+/**
+ * Check if an object is an ONNX tensor.
+ * @param {any} x The object to check
+ * @returns {boolean} Whether the object is an ONNX tensor.
+ */
+export function isONNXTensor(x) {
+    return x instanceof ONNX.Tensor;
+}
+
+/** @type {import('onnxruntime-common').Env} */
+// @ts-ignore
+const ONNX_ENV = ONNX?.env;
+if (ONNX_ENV?.wasm) {
+    // Initialize wasm backend with suitable default settings.
+
+    // (Optional) Set path to wasm files. This is needed when running in a web worker.
+    // https://onnxruntime.ai/docs/api/js/interfaces/Env.WebAssemblyFlags.html#wasmPaths
+    // We use remote wasm files by default to make it easier for newer users.
+    // In practice, users should probably self-host the necessary .wasm files.
+    ONNX_ENV.wasm.wasmPaths = `https://cdn.jsdelivr.net/npm/@huggingface/transformers@${env.version}/dist/`;
+
+    // TODO: Add support for loading WASM files from cached buffer when we upgrade to onnxruntime-web@1.19.0
+    // https://github.com/microsoft/onnxruntime/pull/21534
+
+    // Users may wish to proxy the WASM backend to prevent the UI from freezing,
+    // However, this is not necessary when using WebGPU, so we default to false.
+    ONNX_ENV.wasm.proxy = false;
+
+    // https://developer.mozilla.org/en-US/docs/Web/API/crossOriginIsolated
+    if (typeof crossOriginIsolated === 'undefined' || !crossOriginIsolated) {
+        ONNX_ENV.wasm.numThreads = 1;
+    }
+}
+
+if (ONNX_ENV?.webgpu) {
+    ONNX_ENV.webgpu.powerPreference = 'high-performance';
+}
+
+/**
+ * Check if ONNX's WASM backend is being proxied.
+ * @returns {boolean} Whether ONNX's WASM backend is being proxied.
+ */
+export function isONNXProxy() {
+    // TODO: Update this when allowing non-WASM backends.
+    return ONNX_ENV?.wasm?.proxy;
+}
+
+// Expose ONNX environment variables to `env.backends.onnx`
+env.backends.onnx = ONNX_ENV;
diff --git a/src/configs.js b/src/configs.js
index 4506d2d9c..4bc95cf80 100644
--- a/src/configs.js
+++ b/src/configs.js
@@ -6,8 +6,8 @@
  * **Example:** Load an `AutoConfig`.
  * 
  * ```javascript
- * import { AutoConfig } from '@xenova/transformers';
- * let config = await AutoConfig.from_pretrained('bert-base-uncased');
+ * import { AutoConfig } from '@huggingface/transformers';
+ * const config = await AutoConfig.from_pretrained('bert-base-uncased');
  * console.log(config);
  * // PretrainedConfig {
  * //   "model_type": "bert",
@@ -27,6 +27,7 @@
  * @module configs
  */
 
+import { pick } from './utils/core.js';
 import {
     getModelJSON,
 } from './utils/hub.js';
@@ -40,13 +41,255 @@ import {
  * Loads a config from the specified path.
  * @param {string} pretrained_model_name_or_path The path to the config directory.
  * @param {PretrainedOptions} options Additional options for loading the config.
- * @returns {Promise<Array>} A promise that resolves with information about the loaded config.
+ * @returns {Promise<Object>} A promise that resolves with information about the loaded config.
  */
 async function loadConfig(pretrained_model_name_or_path, options) {
-    let info = await getModelJSON(pretrained_model_name_or_path, 'config.json', true, options);
-    return info;
+    return await getModelJSON(pretrained_model_name_or_path, 'config.json', true, options);
 }
 
+/**
+ * 
+ * @param {PretrainedConfig} config 
+ * @returns {Object} The normalized configuration.
+ */
+function getNormalizedConfig(config) {
+    const mapping = {};
+
+    let init_normalized_config = {};
+    switch (config.model_type) {
+        // Sub-configs
+        case 'llava':
+        case 'paligemma':
+        case 'florence2':
+            init_normalized_config = getNormalizedConfig(config.text_config);
+            break;
+        case 'moondream1':
+            init_normalized_config = getNormalizedConfig(config.phi_config);
+            break;
+        case 'musicgen':
+            init_normalized_config = getNormalizedConfig(config.decoder);
+            break;
+
+        // Decoder-only models
+        case 'gpt2':
+        case 'gptj':
+        case 'jais':
+        case 'codegen':
+        case 'gpt_bigcode':
+            mapping['num_heads'] = 'n_head';
+            mapping['num_layers'] = 'n_layer';
+            mapping['hidden_size'] = 'n_embd';
+            break;
+        case 'gpt_neox':
+        case 'stablelm':
+        case 'opt':
+        case 'phi':
+        case 'phi3':
+        case 'falcon':
+            mapping['num_heads'] = 'num_attention_heads';
+            mapping['num_layers'] = 'num_hidden_layers';
+            mapping['hidden_size'] = 'hidden_size';
+            break;
+        case 'llama':
+        case 'granite':
+        case 'cohere':
+        case 'mistral':
+        case 'starcoder2':
+        case 'qwen2':
+            mapping['num_heads'] = 'num_key_value_heads';
+            mapping['num_layers'] = 'num_hidden_layers';
+            mapping['hidden_size'] = 'hidden_size';
+            mapping['num_attention_heads'] = 'num_attention_heads';
+            break;
+        case 'gemma':
+        case 'gemma2':
+            mapping['num_heads'] = 'num_key_value_heads';
+            mapping['num_layers'] = 'num_hidden_layers';
+            mapping['dim_kv'] = 'head_dim';
+            break;
+        case 'openelm':
+            mapping['num_heads'] = 'num_kv_heads';
+            mapping['num_layers'] = 'num_transformer_layers';
+            mapping['dim_kv'] = 'head_dim';
+            break;
+        case 'gpt_neo':
+        case 'donut-swin':
+            mapping['num_heads'] = 'num_heads';
+            mapping['num_layers'] = 'num_layers';
+            mapping['hidden_size'] = 'hidden_size';
+            break;
+        case 'bloom':
+            mapping['num_heads'] = 'n_head';
+            mapping['num_layers'] = 'n_layer';
+            mapping['hidden_size'] = 'hidden_size';
+            break;
+        case 'mpt':
+            mapping['num_heads'] = 'n_heads';
+            mapping['num_layers'] = 'n_layers';
+            mapping['hidden_size'] = 'd_model';
+            break;
+
+        // Encoder-decoder models
+        case 't5':
+        case 'mt5':
+        case 'longt5':
+            mapping['num_decoder_layers'] = 'num_decoder_layers';
+            mapping['num_decoder_heads'] = 'num_heads';
+            mapping['decoder_dim_kv'] = 'd_kv';
+            mapping['num_encoder_layers'] = 'num_layers';
+            mapping['num_encoder_heads'] = 'num_heads';
+            mapping['encoder_dim_kv'] = 'd_kv';
+            break;
+        case 'bart':
+        case 'mbart':
+        case 'marian':
+        case 'whisper':
+        case 'm2m_100':
+        case 'blenderbot':
+        case 'blenderbot-small':
+        case 'florence2_language':
+            mapping['num_decoder_layers'] = 'decoder_layers';
+            mapping['num_decoder_heads'] = 'decoder_attention_heads';
+            mapping['decoder_hidden_size'] = 'd_model';
+            mapping['num_encoder_layers'] = 'encoder_layers';
+            mapping['num_encoder_heads'] = 'encoder_attention_heads';
+            mapping['encoder_hidden_size'] = 'd_model';
+            break;
+        case 'speecht5':
+            mapping['num_decoder_layers'] = 'decoder_layers';
+            mapping['num_decoder_heads'] = 'decoder_attention_heads';
+            mapping['decoder_hidden_size'] = 'hidden_size';
+            mapping['num_encoder_layers'] = 'encoder_layers';
+            mapping['num_encoder_heads'] = 'encoder_attention_heads';
+            mapping['encoder_hidden_size'] = 'hidden_size';
+            break;
+        case 'trocr':
+            mapping['num_encoder_layers'] = mapping['num_decoder_layers'] = 'decoder_layers';
+            mapping['num_encoder_heads'] = mapping['num_decoder_heads'] = 'decoder_attention_heads';
+            mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'd_model';
+            break;
+        case 'musicgen_decoder':
+            mapping['num_encoder_layers'] = mapping['num_decoder_layers'] = 'num_hidden_layers';
+            mapping['num_encoder_heads'] = mapping['num_decoder_heads'] = 'num_attention_heads';
+            mapping['encoder_hidden_size'] = mapping['decoder_hidden_size'] = 'hidden_size';
+            break;
+
+        case 'vision-encoder-decoder':
+            const decoderConfig = getNormalizedConfig(config.decoder);
+
+            const add_encoder_pkv = 'num_decoder_layers' in decoderConfig;
+            const result = pick(config, ['model_type', 'is_encoder_decoder']);
+            if (add_encoder_pkv) {
+                // Decoder is part of an encoder-decoder model
+                result.num_decoder_layers = decoderConfig.num_decoder_layers;
+                result.num_decoder_heads = decoderConfig.num_decoder_heads;
+                result.decoder_hidden_size = decoderConfig.decoder_hidden_size;
+
+                result.num_encoder_layers = decoderConfig.num_encoder_layers;
+                result.num_encoder_heads = decoderConfig.num_encoder_heads;
+                result.encoder_hidden_size = decoderConfig.encoder_hidden_size;
+            } else {
+                // Decoder is a decoder-only model
+                result.num_layers = decoderConfig.num_layers;
+                result.num_heads = decoderConfig.num_heads;
+                result.hidden_size = decoderConfig.hidden_size;
+            }
+            return result;
+
+    }
+
+    // NOTE: If `num_attention_heads` is not set, it is assumed to be equal to `num_heads`
+    const normalized_config = {
+        ...init_normalized_config,
+        ...pick(config, ['model_type', 'multi_query', 'is_encoder_decoder']),
+    };
+    for (const key in mapping) {
+        normalized_config[key] = config[mapping[key]];
+    }
+    return normalized_config;
+}
+
+/**
+ * 
+ * @param {PretrainedConfig} config 
+ * @returns {Record<string, number[]>}
+ */
+export function getKeyValueShapes(config, {
+    prefix = 'past_key_values',
+} = {}) {
+    /** @type {Record<string, number[]>} */
+    const decoderFeeds = {};
+    const normalized_config = config.normalized_config;
+
+    // TODO support batches (i.e., batch_size > 1)
+    const batch_size = 1;
+
+    if (normalized_config.is_encoder_decoder && (
+        'num_encoder_heads' in normalized_config && 'num_decoder_heads' in normalized_config
+    )) {
+        const encoder_dim_kv = normalized_config.encoder_dim_kv ?? (
+            normalized_config.encoder_hidden_size / normalized_config.num_encoder_heads
+        );
+        const decoder_dim_kv = normalized_config.decoder_dim_kv ?? (
+            normalized_config.decoder_hidden_size / normalized_config.num_decoder_heads
+        );
+
+        const encoder_dims = [batch_size, normalized_config.num_encoder_heads, 0, encoder_dim_kv];
+        const decoder_dims = [batch_size, normalized_config.num_decoder_heads, 0, decoder_dim_kv];
+        for (let i = 0; i < normalized_config.num_decoder_layers; ++i) {
+            decoderFeeds[`${prefix}.${i}.encoder.key`] = encoder_dims;
+            decoderFeeds[`${prefix}.${i}.encoder.value`] = encoder_dims;
+            decoderFeeds[`${prefix}.${i}.decoder.key`] = decoder_dims;
+            decoderFeeds[`${prefix}.${i}.decoder.value`] = decoder_dims;
+        }
+    } else { // Decoders
+        const num_heads = normalized_config.num_heads;
+        const num_layers = normalized_config.num_layers;
+        const dim_kv = normalized_config.dim_kv ?? (
+            normalized_config.hidden_size /
+            (normalized_config.num_attention_heads ?? num_heads)
+        );
+
+        if (normalized_config.model_type === 'falcon') {
+            // NOTE: Custom implementation for Falcon
+            const dims = [batch_size * num_heads, 0, dim_kv]
+            for (let i = 0; i < num_layers; ++i) {
+                decoderFeeds[`${prefix}.${i}.key`] = dims;
+                decoderFeeds[`${prefix}.${i}.value`] = dims;
+            }
+        } else if (normalized_config.multi_query) { // e.g., for `gpt_bigcode`
+            const dims = [batch_size * num_heads, 0, 2 * dim_kv]
+
+            for (let i = 0; i < num_layers; ++i) {
+                decoderFeeds[`${prefix}.${i}.key_value`] = dims;
+            }
+        } else if (normalized_config.model_type === 'bloom') {
+            // NOTE: Custom implementation for Bloom
+
+            const keyDims = [batch_size * num_heads, dim_kv, 0] // [batch_size x num_heads,64,past_sequence_length]
+            const valueDims = [batch_size * num_heads, 0, dim_kv] // [batch_size x num_heads,past_sequence_length,64]
+            for (let i = 0; i < num_layers; ++i) {
+                decoderFeeds[`${prefix}.${i}.key`] = keyDims;
+                decoderFeeds[`${prefix}.${i}.value`] = valueDims;
+            }
+        } else if (normalized_config.model_type === 'openelm') {
+            for (let i = 0; i < num_layers; ++i) {
+                const dims = [batch_size, num_heads[i], 0, dim_kv]
+
+                decoderFeeds[`${prefix}.${i}.key`] = dims;
+                decoderFeeds[`${prefix}.${i}.value`] = dims;
+            }
+        } else { // Decoder-only
+            const dims = [batch_size, num_heads, 0, dim_kv]
+            for (let i = 0; i < num_layers; ++i) {
+                decoderFeeds[`${prefix}.${i}.key`] = dims;
+                decoderFeeds[`${prefix}.${i}.value`] = dims;
+            }
+        }
+    }
+
+    return decoderFeeds;
+}
 /**
  * Base class for all configuration classes. For more information, see the corresponding
  * [Python documentation](https://huggingface.co/docs/transformers/main/en/main_classes/configuration#transformers.PretrainedConfig).
@@ -54,15 +297,25 @@ async function loadConfig(pretrained_model_name_or_path, options) {
 export class PretrainedConfig {
     // NOTE: Typo in original
 
+    /** @type {string|null} */
+    model_type = null;
+
+    /** @type {boolean} */
+    is_encoder_decoder = false;
+
+    /** @type {number} */
+    max_position_embeddings;
+
+    /** @type {TransformersJSConfig} */
+    'transformers.js_config';
+
     /**
      * Create a new PreTrainedTokenizer instance.
      * @param {Object} configJSON The JSON of the config.
      */
     constructor(configJSON) {
-        this.model_type = null;
-        this.is_encoder_decoder = false;
-
         Object.assign(this, configJSON);
+        this.normalized_config = getNormalizedConfig(this);
     }
 
     /**
@@ -81,8 +334,11 @@ export class PretrainedConfig {
         local_files_only = false,
         revision = 'main',
     } = {}) {
+        if (config && !(config instanceof PretrainedConfig)) {
+            config = new PretrainedConfig(config);
+        }
 
-        let data = config ?? await loadConfig(pretrained_model_name_or_path, {
+        const data = config ?? await loadConfig(pretrained_model_name_or_path, {
             progress_callback,
             config,
             cache_dir,
@@ -97,11 +353,23 @@ export class PretrainedConfig {
  * Helper class which is used to instantiate pretrained configs with the `from_pretrained` function.
  * 
  * @example
- * let config = await AutoConfig.from_pretrained('bert-base-uncased'); 
+ * const config = await AutoConfig.from_pretrained('Xenova/bert-base-uncased'); 
  */
 export class AutoConfig {
-    /** @type {PretrainedConfig.from_pretrained} */
+    /** @type {typeof PretrainedConfig.from_pretrained} */
     static async from_pretrained(...args) {
         return PretrainedConfig.from_pretrained(...args);
     }
 }
+
+/**
+ * Transformers.js-specific configuration, possibly present in config.json under the key `transformers.js_config`.
+ * @typedef {Object} TransformersJSConfig
+ * @property {import('./utils/tensor.js').DataType|Record<import('./utils/dtypes.js').DataType, import('./utils/tensor.js').DataType>} [kv_cache_dtype] The data type of the key-value cache.
+ * @property {Record<string, number>} [free_dimension_overrides] Override the free dimensions of the model.
+ * See https://onnxruntime.ai/docs/tutorials/web/env-flags-and-session-options.html#freedimensionoverrides
+ * for more information.
+ * @property {import('./utils/devices.js').DeviceType} [device] The default device to use for the model.
+ * @property {import('./utils/dtypes.js').DataType} [dtype] The default data type to use for the model.
+ * @property {boolean|Record<string, boolean>} [use_external_data_format=false] Whether to load the model using the external data format (used for models >= 2GB in size).
+ */
diff --git a/src/env.js b/src/env.js
index 2ed670021..5e604efdb 100644
--- a/src/env.js
+++ b/src/env.js
@@ -3,19 +3,19 @@
  * 
  * **Example:** Disable remote models.
  * ```javascript
- * import { env } from '@xenova/transformers';
+ * import { env } from '@huggingface/transformers';
  * env.allowRemoteModels = false;
  * ```
  * 
  * **Example:** Set local model path.
  * ```javascript
- * import { env } from '@xenova/transformers';
+ * import { env } from '@huggingface/transformers';
  * env.localModelPath = '/path/to/local/models/';
  * ```
  * 
  * **Example:** Set cache directory.
  * ```javascript
- * import { env } from '@xenova/transformers';
+ * import { env } from '@huggingface/transformers';
  * env.cacheDir = '/path/to/cache/directory/';
  * ```
  * 
@@ -26,19 +26,53 @@ import fs from 'fs';
 import path from 'path';
 import url from 'url';
 
-import { ONNX } from './backends/onnx.js';
-const { env: onnx_env } = ONNX;
-
-const VERSION = '2.17.2';
+const VERSION = '3.0.0';
 
 // Check if various APIs are available (depends on environment)
-const WEB_CACHE_AVAILABLE = typeof self !== 'undefined' && 'caches' in self;
-const FS_AVAILABLE = !isEmpty(fs); // check if file system is available
-const PATH_AVAILABLE = !isEmpty(path); // check if path is available
+const IS_BROWSER_ENV = typeof self !== 'undefined';
+const IS_WEBWORKER_ENV = IS_BROWSER_ENV && self.constructor.name === 'DedicatedWorkerGlobalScope';
+const IS_WEB_CACHE_AVAILABLE = IS_BROWSER_ENV && 'caches' in self;
+const IS_WEBGPU_AVAILABLE = typeof navigator !== 'undefined' && 'gpu' in navigator;
+const IS_WEBNN_AVAILABLE = typeof navigator !== 'undefined' && 'ml' in navigator;
+
+const IS_PROCESS_AVAILABLE = typeof process !== 'undefined';
+const IS_NODE_ENV = IS_PROCESS_AVAILABLE && process?.release?.name === 'node';
+const IS_FS_AVAILABLE = !isEmpty(fs);
+const IS_PATH_AVAILABLE = !isEmpty(path);
+
+/**
+ * A read-only object containing information about the APIs available in the current environment.
+ */
+export const apis = Object.freeze({
+    /** Whether we are running in a browser environment */
+    IS_BROWSER_ENV,
+
+    /** Whether we are running in a web worker environment */
+    IS_WEBWORKER_ENV,
+
+    /** Whether the Cache API is available */
+    IS_WEB_CACHE_AVAILABLE,
+
+    /** Whether the WebGPU API is available */
+    IS_WEBGPU_AVAILABLE,
+
+    /** Whether the WebNN API is available */
+    IS_WEBNN_AVAILABLE,
+
+    /** Whether the Node.js process API is available */
+    IS_PROCESS_AVAILABLE,
+
+    /** Whether we are running in a Node.js environment */
+    IS_NODE_ENV,
 
-const RUNNING_LOCALLY = FS_AVAILABLE && PATH_AVAILABLE;
+    /** Whether the filesystem API is available */
+    IS_FS_AVAILABLE,
 
-// __dirname is reserved so we use dirname__ instead.
+    /** Whether the path API is available */
+    IS_PATH_AVAILABLE,
+});
+
+const RUNNING_LOCALLY = IS_FS_AVAILABLE && IS_PATH_AVAILABLE;
 const dirname__ = RUNNING_LOCALLY
     ? path.dirname(path.dirname(url.fileURLToPath(import.meta.url)))
     : './';
@@ -54,27 +88,17 @@ const localModelPath = RUNNING_LOCALLY
     ? path.join(dirname__, DEFAULT_LOCAL_MODEL_PATH)
     : DEFAULT_LOCAL_MODEL_PATH;
 
-if (onnx_env?.wasm) {
-    // Set path to wasm files. This is needed when running in a web worker.
-    // https://onnxruntime.ai/docs/api/js/interfaces/Env.WebAssemblyFlags.html#wasmPaths
-    // We use remote wasm files by default to make it easier for newer users.
-    // In practice, users should probably self-host the necessary .wasm files.
-    onnx_env.wasm.wasmPaths = RUNNING_LOCALLY
-        ? path.join(dirname__, '/dist/')
-        : `https://cdn.jsdelivr.net/npm/@xenova/transformers@${VERSION}/dist/`;
-}
-
 /**
- * Global variable used to control execution. This provides users a simple way to configure Transformers.js.
- * @property {Object} backends Expose environment variables of different backends,
- * allowing users to set these variables if they want to.
- * @property {string} __dirname Directory name of module. Useful for resolving local paths.
+ * Global variable given visible to users to control execution. This provides users a simple way to configure Transformers.js.
+ * @typedef {Object} TransformersEnvironment
  * @property {string} version This version of Transformers.js.
+ * @property {{onnx: Partial<import('onnxruntime-common').Env>}} backends Expose environment variables of different backends,
+ * allowing users to set these variables if they want to.
  * @property {boolean} allowRemoteModels Whether to allow loading of remote files, defaults to `true`.
  * If set to `false`, it will have the same effect as setting `local_files_only=true` when loading pipelines, models, tokenizers, processors, etc.
  * @property {string} remoteHost Host URL to load models from. Defaults to the Hugging Face Hub.
  * @property {string} remotePathTemplate Path template to fill in and append to `remoteHost` when loading models.
- * @property {boolean} allowLocalModels Whether to allow loading of local files, defaults to `true`.
+ * @property {boolean} allowLocalModels Whether to allow loading of local files, defaults to `false` if running in-browser, and `true` otherwise.
  * If set to `false`, it will skip the local file check and try to load the model from the remote host.
  * @property {string} localModelPath Path to load local models from. Defaults to `/models/`.
  * @property {boolean} useFS Whether to use the file system to load files. By default, it is `true` if available.
@@ -85,32 +109,31 @@ if (onnx_env?.wasm) {
  * @property {Object} customCache The custom cache to use. Defaults to `null`. Note: this must be an object which
  * implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache
  */
+
+/** @type {TransformersEnvironment} */
 export const env = {
+    version: VERSION,
+
     /////////////////// Backends settings ///////////////////
+    // NOTE: These will be populated later by the backends themselves.
     backends: {
         // onnxruntime-web/onnxruntime-node
-        onnx: onnx_env,
-
-        // TensorFlow.js
-        tfjs: {},
+        onnx: {},
     },
 
-    __dirname: dirname__,
-    version: VERSION,
-
     /////////////////// Model settings ///////////////////
     allowRemoteModels: true,
     remoteHost: 'https://huggingface.co/',
     remotePathTemplate: '{model}/resolve/{revision}/',
 
-    allowLocalModels: true,
+    allowLocalModels: !IS_BROWSER_ENV,
     localModelPath: localModelPath,
-    useFS: FS_AVAILABLE,
+    useFS: IS_FS_AVAILABLE,
 
     /////////////////// Cache settings ///////////////////
-    useBrowserCache: WEB_CACHE_AVAILABLE,
+    useBrowserCache: IS_WEB_CACHE_AVAILABLE,
 
-    useFSCache: FS_AVAILABLE,
+    useFSCache: IS_FS_AVAILABLE,
     cacheDir: DEFAULT_CACHE_DIR,
 
     useCustomCache: false,
diff --git a/src/generation/configuration_utils.js b/src/generation/configuration_utils.js
new file mode 100644
index 000000000..33a6fbe81
--- /dev/null
+++ b/src/generation/configuration_utils.js
@@ -0,0 +1,381 @@
+
+/**
+ * @module generation/configuration_utils
+ */
+
+import { pick } from "../utils/core.js";
+
+/**
+ * Class that holds a configuration for a generation task.
+ */
+export class GenerationConfig {
+    // Parameters that control the length of the output
+    /**
+     * The maximum length the generated tokens can have.
+     * Corresponds to the length of the input prompt + `max_new_tokens`.
+     * Its effect is overridden by `max_new_tokens`, if also set.
+     * @type {number}
+     * @default 20
+     */
+    max_length = 20;
+
+    /**
+     * The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
+     * @type {number}
+     * @default null
+     */
+    max_new_tokens = null;
+
+    /**
+     * The minimum length of the sequence to be generated.
+     * Corresponds to the length of the input prompt + `min_new_tokens`.
+     * Its effect is overridden by `min_new_tokens`, if also set.
+     * @type {number}
+     * @default 0
+     */
+    min_length = 0;
+
+    /**
+     * The minimum numbers of tokens to generate, ignoring the number of tokens in the prompt.
+     * @type {number}
+     * @default null
+     */
+    min_new_tokens = null;
+
+    /**
+     * Controls the stopping condition for beam-based methods, like beam-search. It accepts the following values:
+     * - `true`, where the generation stops as soon as there are `num_beams` complete candidates;
+     * - `false`, where an heuristic is applied and the generation stops when is it very unlikely to find better candidates;
+     * - `"never"`, where the beam search procedure only stops when there cannot be better candidates (canonical beam search algorithm).
+     * @type {boolean|"never"}
+     * @default false
+     */
+    early_stopping = false;
+
+    /**
+     * The maximum amount of time you allow the computation to run for in seconds.
+     * Generation will still finish the current pass after allocated time has been passed.
+     * @type {number}
+     * @default null
+     */
+    max_time = null;
+
+    // Parameters that control the generation strategy used
+    /**
+     * Whether or not to use sampling; use greedy decoding otherwise.
+     * @type {boolean}
+     * @default false
+     */
+    do_sample = false;
+
+    /**
+     * Number of beams for beam search. 1 means no beam search.
+     * @type {number}
+     * @default 1
+     */
+    num_beams = 1;
+
+    /**
+     * Number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams.
+     * See [this paper](https://arxiv.org/pdf/1610.02424.pdf) for more details.
+     * @type {number}
+     * @default 1
+     */
+    num_beam_groups = 1;
+
+    /**
+     * The values balance the model confidence and the degeneration penalty in contrastive search decoding.
+     * @type {number}
+     * @default null
+     */
+    penalty_alpha = null;
+
+    /**
+     * Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding.
+     * @type {boolean}
+     * @default true
+     */
+    use_cache = true;
+
+    // Parameters for manipulation of the model output logits
+    /**
+     * The value used to modulate the next token probabilities.
+     * @type {number}
+     * @default 1.0
+     */
+    temperature = 1.0;
+
+    /**
+     * The number of highest probability vocabulary tokens to keep for top-k-filtering.
+     * @type {number}
+     * @default 50
+     */
+    top_k = 50;
+
+    /**
+     * If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation.
+     * @type {number}
+     * @default 1.0
+     */
+    top_p = 1.0;
+
+    /**
+     * Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated.
+     * If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to `typical_p` or higher are kept for generation.
+     * See [this paper](https://arxiv.org/pdf/2202.00666.pdf) for more details.
+     * @type {number}
+     * @default 1.0
+     */
+    typical_p = 1.0;
+
+    /**
+     * If set to float strictly between 0 and 1, only tokens with a conditional probability greater than `epsilon_cutoff` will be sampled.
+     * In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model.
+     * See [Truncation Sampling as Language Model Desmoothing](https://arxiv.org/abs/2210.15191) for more details.
+     * @type {number}
+     * @default 0.0
+     */
+    epsilon_cutoff = 0.0;
+
+    /**
+     * Eta sampling is a hybrid of locally typical sampling and epsilon sampling.
+     * If set to float strictly between 0 and 1, a token is only considered if it is greater than either `eta_cutoff` or `sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits)))`.
+     * The latter term is intuitively the expected next token probability, scaled by `sqrt(eta_cutoff)`. In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
+     * See [Truncation Sampling as Language Model Desmoothing](https://arxiv.org/abs/2210.15191) for more details.
+     * @type {number}
+     * @default 0.0
+     */
+    eta_cutoff = 0.0;
+
+    /**
+     * This value is subtracted from a beam's score if it generates a token same as any beam from other group at a particular time.
+     * Note that `diversity_penalty` is only effective if `group beam search` is enabled.
+     * @type {number}
+     * @default 0.0
+     */
+    diversity_penalty = 0.0;
+
+    /**
+     * The parameter for repetition penalty. 1.0 means no penalty.
+     * See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
+     * @type {number}
+     * @default 1.0
+     */
+    repetition_penalty = 1.0;
+
+    /**
+     * The paramater for encoder_repetition_penalty.
+     * An exponential penalty on sequences that are not in the original input.
+     * 1.0 means no penalty.
+     * @type {number}
+     * @default 1.0
+     */
+    encoder_repetition_penalty = 1.0;
+
+    /**
+     * Exponential penalty to the length that is used with beam-based generation.
+     * It is applied as an exponent to the sequence length, which in turn is used to divide the score of the sequence.
+     * Since the score is the log likelihood of the sequence (i.e. negative), `length_penalty` > 0.0 promotes longer sequences, while `length_penalty` < 0.0 encourages shorter sequences.
+     * @type {number}
+     * @default 1.0
+     */
+    length_penalty = 1.0;
+
+    /**
+     * If set to int > 0, all ngrams of that size can only occur once.
+     * @type {number}
+     * @default 0
+     */
+    no_repeat_ngram_size = 0;
+
+    /**
+     * List of token ids that are not allowed to be generated.
+     * In order to get the token ids of the words that should not appear in the generated text, use
+     * `tokenizer(bad_words, { add_prefix_space: true, add_special_tokens: false }).input_ids`.
+     * @type {number[][]}
+     * @default null
+     */
+    bad_words_ids = null;
+
+    /**
+     * List of token ids that must be generated.
+     * If given a `number[][]`, this is treated as a simple list of words that must be included, the opposite to `bad_words_ids`.
+     * If given `number[][][]`, this triggers a [disjunctive constraint](https://github.com/huggingface/transformers/issues/14081), where one can allow different forms of each word.
+     * @type {number[][]|number[][][]}
+     * @default null
+     */
+    force_words_ids = null;
+
+    /**
+     * Whether to renormalize the logits after applying all the logits processors or warpers (including the custom ones).
+     * It's highly recommended to set this flag to `true` as the search algorithms suppose the score logits are normalized but some logit processors or warpers break the normalization.
+     * @type {boolean}
+     * @default false
+     */
+    renormalize_logits = false;
+
+    /**
+     * Custom constraints that can be added to the generation to ensure that the output will contain the use of certain tokens as defined by `Constraint` objects, in the most sensible way possible.
+     * @type {Object[]}
+     * @default null
+     */
+    constraints = null;
+
+    /**
+     * The id of the token to force as the first generated token after the `decoder_start_token_id`.
+     * Useful for multilingual models like mBART where the first generated token needs to be the target language token.
+     * @type {number}
+     * @default null
+     */
+    forced_bos_token_id = null;
+
+    /**
+     * The id of the token to force as the last generated token when `max_length` is reached.
+     * Optionally, use a list to set multiple *end-of-sequence* tokens.
+     * @type {number|number[]}
+     * @default null
+     */
+    forced_eos_token_id = null;
+
+    /**
+     * Whether to remove possible *nan* and *inf* outputs of the model to prevent the generation method to crash. Note that using `remove_invalid_values` can slow down generation.
+     * @type {boolean}
+     */
+    remove_invalid_values = false;
+
+    /**
+     * This Tuple adds an exponentially increasing length penalty, after a certain amount of tokens have been generated.
+     * The tuple shall consist of: `(start_index, decay_factor)` where `start_index` indicates where penalty starts and `decay_factor` represents the factor of exponential decay.
+     * @type {[number, number]}
+     * @default null
+     */
+    exponential_decay_length_penalty = null;
+
+    /**
+     * A list of tokens that will be suppressed at generation.
+     * The `SuppressTokens` logit processor will set their log probs to `-inf` so that they are not sampled.
+     * @type {number[]}
+     * @default null
+     */
+    suppress_tokens = null;
+
+    /**
+     * A list of tokens that will be suppressed at the beginning of the generation.
+     * The `SuppressBeginTokens` logit processor will set their log probs to `-inf` so that they are not sampled.
+     * @type {number[]}
+     * @default null
+     */
+    begin_suppress_tokens = null;
+
+    /**
+     * A list of pairs of integers which indicates a mapping from generation indices to token indices that will be forced before sampling.
+     * For example, `[[1, 123]]` means the second generated token will always be a token of index 123.
+     * @type {[number, number][]}
+     * @default null
+     */
+    forced_decoder_ids = null;
+
+    /**
+     * The guidance scale for classifier free guidance (CFG). CFG is enabled by setting `guidance_scale > 1`.
+     * Higher guidance scale encourages the model to generate samples that are more closely linked to the input
+     * prompt, usually at the expense of poorer quality.
+     * @type {number}
+     * @default null
+     */
+    guidance_scale = null;
+
+    // Parameters that define the output variables of `generate`
+    /**
+     * The number of independently computed returned sequences for each element in the batch.
+     * @type {number}
+     * @default 1
+     */
+    num_return_sequences = 1;
+
+    /**
+     * Whether or not to return the attentions tensors of all attention layers.
+     * See `attentions` under returned tensors for more details.
+     * @type {boolean}
+     * @default false
+     */
+    output_attentions = false;
+
+    /**
+     * Whether or not to return the hidden states of all layers.
+     * See `hidden_states` under returned tensors for more details.
+     * @type {boolean}
+     * @default false
+     */
+    output_hidden_states = false;
+
+    /**
+     * Whether or not to return the prediction scores.
+     * See `scores` under returned tensors for more details.
+     * @type {boolean}
+     * @default false
+     */
+    output_scores = false;
+
+    /**
+     * Whether or not to return a `ModelOutput` instead of a plain tuple.
+     * @type {boolean}
+     * @default false
+     */
+    return_dict_in_generate = false;
+
+    // Special tokens that can be used at generation time
+    /**
+     * The id of the *padding* token.
+     * @type {number}
+     * @default null
+     */
+    pad_token_id = null;
+
+    /**
+     * The id of the *beginning-of-sequence* token.
+     * @type {number}
+     * @default null
+     */
+    bos_token_id = null;
+
+    /**
+     * The id of the *end-of-sequence* token.
+     * Optionally, use a list to set multiple *end-of-sequence* tokens.
+     * @type {number|number[]}
+     * @default null
+     */
+    eos_token_id = null;
+
+    // Generation parameters exclusive to encoder-decoder models
+    /**
+     * If set to int > 0, all ngrams of that size that occur in the `encoder_input_ids` cannot occur in the `decoder_input_ids`.
+     * @type {number}
+     * @default 0
+     */
+    encoder_no_repeat_ngram_size = 0;
+
+    /**
+     * If an encoder-decoder model starts decoding with a different token than *bos*, the id of that token.
+     * @type {number}
+     * @default null
+     */
+    decoder_start_token_id = null;
+
+    // Wild card
+    /**
+     * Additional generation kwargs will be forwarded to the `generate` function of the model.
+     * Kwargs that are not present in `generate`'s signature will be used in the model forward pass.
+     * @type {Object}
+     * @default {}
+     */
+    generation_kwargs = {};
+
+    /**
+     * 
+     * @param {GenerationConfig|import('../configs.js').PretrainedConfig} config 
+     */
+    constructor(config) {
+        Object.assign(this, pick(config, Object.getOwnPropertyNames(this)));
+    }
+}
+
diff --git a/src/generation/logits_process.js b/src/generation/logits_process.js
new file mode 100644
index 000000000..732af4f3f
--- /dev/null
+++ b/src/generation/logits_process.js
@@ -0,0 +1,719 @@
+
+/**
+ * @module generation/logits_process
+ */
+
+import { Callable } from "../utils/generic.js";
+import { Tensor } from "../utils/tensor.js";
+
+import { max, log_softmax } from "../utils/maths.js";
+
+/**
+ * Abstract base class for all logit processors that can be applied during generation.
+ */
+export class LogitsProcessor extends Callable {
+    /**
+     * Apply the processor to the input logits.
+     *
+     * @abstract
+     * @param {bigint[][]} input_ids The input ids.
+     * @param {Tensor} logits The logits to process.
+     * @throws {Error} Throws an error if `_call` is not implemented in the subclass.
+     */
+    _call(input_ids, logits) {
+        throw Error("`_call` should be implemented in a subclass")
+    }
+}
+
+
+/**
+ * Abstract base class for all logit warpers that can be applied during generation with multinomial sampling.
+ */
+export class LogitsWarper extends Callable {
+    /**
+     * Apply the processor to the input logits.
+     *
+     * @abstract
+     * @param {bigint[][]} input_ids The input ids.
+     * @param {Tensor} logits The logits to process.
+     * @throws {Error} Throws an error if `_call` is not implemented in the subclass.
+     */
+    _call(input_ids, logits) {
+        throw Error("`_call` should be implemented in a subclass")
+    }
+}
+
+
+/**
+ * A class representing a list of logits processors. A logits processor is a function that modifies the logits
+ * output of a language model. This class provides methods for adding new processors and applying all processors to a
+ * batch of logits.
+ */
+export class LogitsProcessorList extends Callable {
+    /**
+     * Constructs a new instance of `LogitsProcessorList`.
+     */
+    constructor() {
+        super();
+        this.processors = [];
+    }
+
+    /**
+     * Adds a new logits processor to the list.
+     *
+     * @param {LogitsProcessor} item The logits processor function to add.
+     */
+    push(item) {
+        this.processors.push(item);
+    }
+
+    /**
+     * Adds multiple logits processors to the list.
+     *
+     * @param {LogitsProcessor[]} items The logits processor functions to add.
+     */
+    extend(items) {
+        this.processors.push(...items);
+    }
+
+    /**
+     * Applies all logits processors in the list to a batch of logits, modifying them in-place.
+     *
+     * @param {bigint[][]} input_ids The input IDs for the language model.
+     * @param {Tensor} logits
+     */
+    _call(input_ids, logits) {
+        let toReturn = logits;
+        // NOTE: Most processors modify logits inplace
+        for (const processor of this.processors) {
+            toReturn = processor(input_ids, toReturn);
+        }
+        return toReturn;
+    }
+
+    [Symbol.iterator]() {
+        return this.processors.values();
+    }
+}
+
+// DEPRECATED: https://github.com/huggingface/transformers/pull/29485
+// /**
+//  * A logits processor that forces a specific token to be generated by the decoder.
+//  */
+// export class ForceTokensLogitsProcessor extends LogitsProcessor {
+//     /**
+//      * Constructs a new instance of `ForceTokensLogitsProcessor`.
+//      * 
+//      * @param {[number, number][]} forced_decoder_ids The ids of tokens that should be forced.
+//      */
+//     constructor(forced_decoder_ids) {
+//         super();
+//         // TODO: convert to `new Map(forced_decoder_ids)`
+//         this.force_token_map = Object.fromEntries(forced_decoder_ids ?? []);
+//     }
+
+//     /**
+//      * Apply the processor to the input logits.
+//      *
+//      * @param {bigint[][]} input_ids The input ids.
+//      * @param {Tensor} logits The logits to process.
+//      * @returns {Tensor} The processed logits.
+//      */
+//     _call(input_ids, logits) {
+//         console.log('this.force_token_map', this.force_token_map)
+//         console.log('call ForceTokensLogitsProcessor', input_ids, logits)
+//         console.log('input_ids.length', input_ids.length)
+//         let map = this.force_token_map[input_ids.length];
+//         if (map) { // There exists a mapping
+//             logits.data.fill(-Infinity)
+//             logits.data[map] = 0;
+//         }
+//         console.log('map', map)
+//         // throw Error("Not implemented")
+//         return logits;
+//     }
+// }
+
+/**
+ * A LogitsProcessor that forces a BOS token at the beginning of the generated sequence.
+ */
+export class ForcedBOSTokenLogitsProcessor extends LogitsProcessor {
+    /**
+     * Create a ForcedBOSTokenLogitsProcessor.
+     * @param {number} bos_token_id The ID of the beginning-of-sequence token to be forced.
+     */
+    constructor(bos_token_id) {
+        super();
+        this.bos_token_id = bos_token_id;
+    }
+
+    /**
+     * Apply the BOS token forcing to the logits.
+     * @param {bigint[][]} input_ids The input IDs.
+     * @param {Tensor} logits The logits.
+     * @returns {Object} The logits with BOS token forcing.
+     */
+    _call(input_ids, logits) {
+        for (let i = 0; i < input_ids.length; ++i) {
+            if (input_ids[i].length === 1) {
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
+                batch_logits_data.fill(-Infinity);
+                batch_logits_data[this.bos_token_id] = 0;
+            }
+        }
+        return logits;
+    }
+}
+
+/**
+ * A logits processor that enforces the specified token as the last generated token when `max_length` is reached.
+ */
+export class ForcedEOSTokenLogitsProcessor extends LogitsProcessor {
+    /**
+     * Create a ForcedEOSTokenLogitsProcessor.
+     * @param {number} max_length The maximum length of the sequence to be generated.
+     * @param {number|number[]} eos_token_id The id(s) of the *end-of-sequence* token.
+     */
+    constructor(max_length, eos_token_id) {
+        super();
+        this.max_length = max_length;
+        this.eos_token_id = Array.isArray(eos_token_id) ? eos_token_id : [eos_token_id];
+    }
+
+    /**
+     * Apply the processor to input_ids and logits.
+     * 
+     * @param {bigint[][]} input_ids The input ids.
+     * @param {Tensor} logits The logits tensor.
+     */
+    _call(input_ids, logits) {
+        for (let i = 0; i < input_ids.length; ++i) {
+            if (input_ids[i].length === this.max_length - 1) {
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
+                batch_logits_data.fill(-Infinity);
+                for (const eos_token of this.eos_token_id) {
+                    batch_logits_data[eos_token] = 0;
+                }
+            }
+        }
+        return logits;
+    }
+}
+
+/**
+ * A LogitsProcessor that suppresses a list of tokens as soon as the `generate` function starts
+ * generating using `begin_index` tokens. This should ensure that the tokens defined by
+ * `begin_suppress_tokens` at not sampled at the begining of the generation.
+ */
+export class SuppressTokensAtBeginLogitsProcessor extends LogitsProcessor {
+    /**
+     * Create a SuppressTokensAtBeginLogitsProcessor.
+     * @param {number[]} begin_suppress_tokens The IDs of the tokens to suppress.
+     * @param {number} begin_index The number of tokens to generate before suppressing tokens.
+     */
+    constructor(begin_suppress_tokens, begin_index) {
+        super();
+        this.begin_suppress_tokens = begin_suppress_tokens;
+        this.begin_index = begin_index;
+    }
+
+    /**
+     * Apply the BOS token forcing to the logits.
+     * @param {bigint[][]} input_ids The input IDs.
+     * @param {Tensor} logits The logits.
+     * @returns {Object} The logits with BOS token forcing.
+     */
+    _call(input_ids, logits) {
+        for (let i = 0; i < input_ids.length; ++i) {
+            if (input_ids[i].length === this.begin_index) {
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
+                for (const token_id of this.begin_suppress_tokens) {
+                    batch_logits_data[token_id] = -Infinity;
+                }
+            }
+        }
+        return logits;
+    }
+}
+
+/**
+ * A LogitsProcessor that handles adding timestamps to generated text.
+ */
+export class WhisperTimeStampLogitsProcessor extends LogitsProcessor {
+    /**
+     * Constructs a new WhisperTimeStampLogitsProcessor.
+     * @param {import('../models/whisper/generation_whisper.js').WhisperGenerationConfig} generate_config The config object passed to the `generate()` method of a transformer model.
+     * @param {number[]} init_tokens The initial tokens of the input sequence.
+     */
+    constructor(generate_config, init_tokens) {
+        super();
+        this.eos_token_id =
+            Array.isArray(generate_config.eos_token_id)
+                ? generate_config.eos_token_id[0]
+                : generate_config.eos_token_id;
+
+        this.no_timestamps_token_id = generate_config.no_timestamps_token_id;
+        this.timestamp_begin = this.no_timestamps_token_id + 1;
+
+        this.begin_index = init_tokens.length;
+        if (init_tokens.at(-1) === this.no_timestamps_token_id) {
+            this.begin_index -= 1;
+        }
+        this.max_initial_timestamp_index = generate_config.max_initial_timestamp_index;
+    }
+
+    /**
+     * Modify the logits to handle timestamp tokens.
+     * @param {bigint[][]} input_ids The input sequence of tokens.
+     * @param {Tensor} logits The logits output by the model.
+     * @returns {Tensor} The modified logits.
+     */
+    _call(input_ids, logits) {
+        for (let i = 0; i < input_ids.length; ++i) {
+            const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
+
+            // suppress <|notimestamps|> which is handled by without_timestamps
+            batch_logits_data[this.no_timestamps_token_id] = -Infinity;
+
+            if (input_ids[i].length === this.begin_index - 1) {
+                batch_logits_data.fill(-Infinity);
+                batch_logits_data[this.timestamp_begin] = 0;
+                continue;
+            }
+
+            // timestamps have to appear in pairs, except directly before eos_token; mask logits accordingly
+            const seq = input_ids[i].slice(this.begin_index);
+            const last_was_timestamp = seq.length >= 1 && seq[seq.length - 1] >= this.timestamp_begin;
+            const penultimate_was_timestamp = seq.length < 2 || seq[seq.length - 2] >= this.timestamp_begin;
+
+            if (last_was_timestamp) {
+                if (penultimate_was_timestamp) { // has to be non-timestamp
+                    batch_logits_data.subarray(this.timestamp_begin).fill(-Infinity);
+                } else { // cannot be normal text tokens
+                    batch_logits_data.subarray(0, this.eos_token_id).fill(-Infinity);
+                }
+            }
+
+            // apply the `max_initial_timestamp` option
+            if (input_ids[i].length === this.begin_index && this.max_initial_timestamp_index !== null) {
+                const last_allowed = this.timestamp_begin + this.max_initial_timestamp_index;
+                batch_logits_data.subarray(last_allowed + 1).fill(-Infinity);
+            }
+
+            // if sum of probability over timestamps is above any other token, sample timestamp
+            const logprobs = log_softmax(batch_logits_data);
+            const timestamp_logprob = Math.log(logprobs.subarray(this.timestamp_begin).map(Math.exp).reduce((a, b) => a + b));
+            const max_text_token_logprob = max(logprobs.subarray(0, this.timestamp_begin))[0];
+
+            if (timestamp_logprob > max_text_token_logprob) {
+                batch_logits_data.subarray(0, this.timestamp_begin).fill(-Infinity);
+            }
+        }
+
+        return logits;
+    }
+}
+
+/**
+ * A logits processor that disallows ngrams of a certain size to be repeated.
+ */
+export class NoRepeatNGramLogitsProcessor extends LogitsProcessor {
+    /**
+     * Create a NoRepeatNGramLogitsProcessor.
+     * @param {number} no_repeat_ngram_size The no-repeat-ngram size. All ngrams of this size can only occur once.
+     */
+    constructor(no_repeat_ngram_size) {
+        super();
+        this.no_repeat_ngram_size = no_repeat_ngram_size;
+    }
+
+    /**
+     * Generate n-grams from a sequence of token ids.
+     * @param {bigint[]} prevInputIds List of previous input ids
+     * @returns {Map<string, number[]>} Map of generated n-grams
+     */
+    getNgrams(prevInputIds) {
+        const curLen = prevInputIds.length;
+
+        /**@type {number[][]} */
+        const ngrams = [];
+        for (let j = 0; j < curLen + 1 - this.no_repeat_ngram_size; ++j) {
+            const ngram = [];
+            for (let k = 0; k < this.no_repeat_ngram_size; ++k) {
+                ngram.push(prevInputIds[j + k]);
+            }
+            ngrams.push(ngram.map(Number));
+        }
+
+        /** @type {Map<string, number[]>} */
+        const generatedNgram = new Map();
+        for (const ngram of ngrams) {
+            const prevNgram = ngram.slice(0, ngram.length - 1);
+            const prevNgramKey = JSON.stringify(prevNgram);
+            const prevNgramValue = generatedNgram.get(prevNgramKey) ?? [];
+            prevNgramValue.push(ngram[ngram.length - 1]);
+            generatedNgram.set(prevNgramKey, prevNgramValue);
+        }
+        return generatedNgram;
+    }
+
+    /**
+     * Generate n-grams from a sequence of token ids.
+     * @param {Map<string, number[]>} bannedNgrams Map of banned n-grams
+     * @param {bigint[]} prevInputIds List of previous input ids
+     * @returns {number[]} Map of generated n-grams
+     */
+    getGeneratedNgrams(bannedNgrams, prevInputIds) {
+        const ngramIdx = prevInputIds.slice(prevInputIds.length + 1 - this.no_repeat_ngram_size, prevInputIds.length);
+        const banned = bannedNgrams.get(JSON.stringify(ngramIdx.map(Number))) ?? [];
+        return banned;
+    }
+
+    /**
+     * Calculate banned n-gram tokens
+     * @param {bigint[]} prevInputIds List of previous input ids
+     * @returns {number[]} Map of generated n-grams
+     */
+    calcBannedNgramTokens(prevInputIds) {
+        const bannedTokens = [];
+        if (prevInputIds.length + 1 < this.no_repeat_ngram_size) {
+            // return no banned tokens if we haven't generated no_repeat_ngram_size tokens yet
+            return bannedTokens;
+
+        } else {
+            const generatedNgrams = this.getNgrams(prevInputIds);
+            const bannedTokens = this.getGeneratedNgrams(generatedNgrams, prevInputIds);
+            return bannedTokens;
+        }
+    }
+
+    /**
+     * Apply the no-repeat-ngram processor to the logits.
+     * @param {bigint[][]} input_ids The input IDs.
+     * @param {Tensor} logits The logits.
+     * @returns {Object} The logits with no-repeat-ngram processing.
+     */
+    _call(input_ids, logits) {
+        for (let i = 0; i < input_ids.length; ++i) {
+            const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
+            const bannedTokens = this.calcBannedNgramTokens(input_ids[i]);
+            for (const token of bannedTokens) {
+                batch_logits_data[token] = -Infinity;
+            }
+        }
+        return logits;
+    }
+}
+
+/**
+ * A logits processor that penalises repeated output tokens.
+ */
+export class RepetitionPenaltyLogitsProcessor extends LogitsProcessor {
+    /**
+     * Create a RepetitionPenaltyLogitsProcessor.
+     * @param {number} penalty The penalty to apply for repeated tokens.
+     */
+    constructor(penalty) {
+        super();
+        this.penalty = penalty;
+    }
+
+    /**
+     * Apply the repetition penalty to the logits.
+     * @param {bigint[][]} input_ids The input IDs.
+     * @param {Tensor} logits The logits.
+     * @returns {Object} The logits with repetition penalty processing.
+     */
+    _call(input_ids, logits) {
+        // Modify the logits corresponding to each element in `input_ids`.
+        // As a consequence, the logits corresponding to tokens that appear
+        // many times in the output will be penalised more.
+
+        for (let i = 0; i < input_ids.length; ++i) {
+            const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
+            for (const input_id of input_ids[i]) {
+                const token = Number(input_id);
+                if (batch_logits_data[token] < 0) {
+                    batch_logits_data[token] *= this.penalty;
+                } else {
+                    batch_logits_data[token] /= this.penalty;
+                }
+            }
+        }
+
+        return logits
+    }
+}
+
+/**
+ * A logits processor that enforces a minimum number of tokens.
+ */
+export class MinLengthLogitsProcessor extends LogitsProcessor {
+    /**
+     * Create a MinLengthLogitsProcessor.
+     * @param {number} min_length The minimum length below which the score of `eos_token_id` is set to negative infinity.
+     * @param {number|number[]} eos_token_id The ID/IDs of the end-of-sequence token.
+     */
+    constructor(min_length, eos_token_id) {
+        super();
+        this.min_length = min_length;
+        this.eos_token_id = Array.isArray(eos_token_id) ? eos_token_id : [eos_token_id];
+    }
+
+    /**
+     * Apply logit processor.
+     * @param {bigint[][]} input_ids The input IDs.
+     * @param {Tensor} logits The logits.
+     * @returns {Object} The processed logits.
+     */
+    _call(input_ids, logits) {
+        for (let i = 0; i < input_ids.length; ++i) {
+            if (input_ids[i].length < this.min_length) {
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
+
+                for (const eos_token of this.eos_token_id) {
+                    batch_logits_data[eos_token] = -Infinity;
+                }
+            }
+        }
+
+        return logits
+    }
+}
+
+/**
+ * A logits processor that enforces a minimum number of new tokens.
+ */
+export class MinNewTokensLengthLogitsProcessor extends LogitsProcessor {
+    /**
+     * Create a MinNewTokensLengthLogitsProcessor.
+     * @param {number} prompt_length_to_skip The input tokens length.
+     * @param {number} min_new_tokens The minimum *new* tokens length below which the score of `eos_token_id` is set to negative infinity.
+     * @param {number|number[]} eos_token_id The ID/IDs of the end-of-sequence token.
+     */
+    constructor(prompt_length_to_skip, min_new_tokens, eos_token_id) {
+        super();
+        this.prompt_length_to_skip = prompt_length_to_skip;
+        this.min_new_tokens = min_new_tokens;
+        this.eos_token_id = Array.isArray(eos_token_id) ? eos_token_id : [eos_token_id];
+    }
+
+    /**
+     * Apply logit processor.
+     * @param {bigint[][]} input_ids The input IDs.
+     * @param {Tensor} logits The logits.
+     * @returns {Object} The processed logits.
+     */
+    _call(input_ids, logits) {
+        for (let i = 0; i < input_ids.length; ++i) {
+            const new_tokens_length = input_ids[i].length - this.prompt_length_to_skip;
+            if (new_tokens_length < this.min_new_tokens) {
+                const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
+
+                for (const eos_token of this.eos_token_id) {
+                    batch_logits_data[eos_token] = -Infinity;
+                }
+            }
+        }
+        return logits
+    }
+}
+
+export class NoBadWordsLogitsProcessor extends LogitsProcessor {
+    /**
+     * Create a `NoBadWordsLogitsProcessor`.
+     * @param {number[][]} bad_words_ids List of list of token ids that are not allowed to be generated.
+     * @param {number|number[]} eos_token_id The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
+     */
+    constructor(bad_words_ids, eos_token_id) {
+        super();
+        this.bad_words_ids = bad_words_ids;
+        this.eos_token_id = Array.isArray(eos_token_id) ? eos_token_id : [eos_token_id];
+    }
+
+    /**
+     * Apply logit processor.
+     * @param {bigint[][]} input_ids The input IDs.
+     * @param {Tensor} logits The logits.
+     * @returns {Object} The processed logits.
+     */
+    _call(input_ids, logits) {
+        for (let i = 0; i < input_ids.length; ++i) {
+            const batch_logits_data = /** @type {Float32Array} */(logits[i].data);
+            const ids = input_ids[i];
+            for (const bad_word_ids of this.bad_words_ids) {
+                // Whether to modify the logits of the last token in the bad word id sequence
+                let mark = true;
+
+                // For each bad word in the list, if the current sequence of input ids ends with this sequence (excluding the last),
+                // then we set the logits of the last bad word id to -Infinity.
+                for (let j = 1; j <= bad_word_ids.length - 1 && bad_word_ids.length < ids.length; ++j) {
+
+                    // NOTE: We use != instead of !== to compare bigint and number
+                    // @ts-ignore
+                    if (bad_word_ids.at(-j - 1) != ids.at(-j)) {
+                        // We have found a mismatch
+                        mark = false;
+                        break;
+                    }
+                }
+                if (mark) {
+                    batch_logits_data[bad_word_ids.at(-1)] = -Infinity;
+                }
+            }
+        }
+        return logits
+    }
+}
+
+/**
+ * [`LogitsProcessor`] for classifier free guidance (CFG). The scores are split over the batch dimension,
+ * where the first half correspond to the conditional logits (predicted from the input prompt) and the second half
+ * correspond to the unconditional logits (predicted from an empty or 'null' prompt). The processor computes a
+ * weighted average across the conditional and unconditional logits, parameterised by the `guidance_scale`.
+ * 
+ * See [the paper](https://arxiv.org/abs/2306.05284) for more information.
+ */
+export class ClassifierFreeGuidanceLogitsProcessor extends LogitsProcessor {
+
+    /**
+     * Create a `ClassifierFreeGuidanceLogitsProcessor`.
+     * @param {number} guidance_scale The guidance scale for classifier free guidance (CFG). CFG is enabled by setting `guidance_scale > 1`.
+     * Higher guidance scale encourages the model to generate samples that are more closely linked to the input
+     * prompt, usually at the expense of poorer quality.
+     */
+    constructor(guidance_scale) {
+        super();
+        if (guidance_scale <= 1) {
+            throw new Error(
+                `Require guidance scale >1 to use the classifier free guidance processor, got guidance scale ${guidance_scale}.`
+            )
+        }
+        this.guidance_scale = guidance_scale;
+    }
+
+    /**
+     * Apply logit processor.
+     * @param {bigint[][]} input_ids The input IDs.
+     * @param {Tensor} logits The logits.
+     * @returns {Object} The processed logits.
+     */
+    _call(input_ids, logits) {
+        if (logits.dims[0] !== 2 * input_ids.length) {
+            throw new Error(
+                `Logits should have twice the batch size of the input ids, the first half of batches corresponding to ` +
+                `the conditional inputs, and the second half of batches corresponding to the unconditional inputs. Got ` +
+                `batch size ${logits.dims[0]} for the logits and ${input_ids.length} for the input ids.`
+            )
+        }
+
+        const unguided_bsz = input_ids.length;
+        const cond_logits = logits.slice([0, unguided_bsz], null);
+        const uncond_logits = logits.slice([unguided_bsz, logits.dims[0]], null);
+
+        // Merge into uncond_logits (to save memory). This is equivalent to the following:
+        // scores = uncond_logits + (cond_logits - uncond_logits) * guidance_scale
+        for (let i = 0; i < uncond_logits.data.length; ++i) {
+            uncond_logits.data[i] += (cond_logits.data[i] - uncond_logits.data[i]) * this.guidance_scale;
+        }
+
+        return uncond_logits;
+    }
+}
+
+/**
+ * [`LogitsWarper`] for temperature (exponential scaling output probability distribution), which effectively means
+ * that it can control the randomness of the predicted tokens. Often used together with [`TopPLogitsWarper`] and [`TopKLogitsWarper`].
+ */
+export class TemperatureLogitsWarper extends LogitsWarper {
+    /**
+     * Create a `TemperatureLogitsWarper`.
+     * @param {number} temperature Strictly positive float value used to modulate the logits distribution.
+     * A value smaller than `1` decreases randomness (and vice versa), with `0` being equivalent to shifting
+     * all probability mass to the most likely token.
+     */
+    constructor(temperature) {
+        super();
+
+        if (typeof temperature !== 'number' || temperature <= 0) {
+            let errorMessage =
+                `\`temperature\` (=${temperature}) must be a strictly positive float, otherwise your next token scores will be invalid.`;
+
+            if (temperature === 0) {
+                errorMessage += " If you're looking for greedy decoding strategies, set `do_sample=false`."
+            }
+        }
+        this.temperature = temperature;
+    }
+
+    /**
+     * Apply logit warper.
+     * @param {bigint[][]} input_ids The input IDs.
+     * @param {Tensor} logits The logits.
+     * @returns {Object} The processed logits.
+     */
+    _call(input_ids, logits) {
+        const batch_logits_data = /** @type {Float32Array} */(logits.data);
+        for (let i = 0; i < batch_logits_data.length; ++i) {
+            batch_logits_data[i] /= this.temperature;
+        }
+        return logits;
+    }
+}
+
+/**
+ * [`LogitsWarper`] that performs top-p, i.e. restricting to top tokens summing to prob_cut_off <= prob_cut_off.
+ * Often used together with [`TemperatureLogitsWarper`] and [`TopKLogitsWarper`].
+ */
+export class TopPLogitsWarper extends LogitsWarper {
+    /**
+     * Create a `TopPLogitsWarper`.
+     * @param {number} top_p If set to < 1, only the smallest set of most probable tokens with
+     * probabilities that add up to `top_p` or higher are kept for generation.
+     * @param {Object} options Additional options for the top-p sampling.
+     * @param {number} [options.filter_value=-Infinity] All filtered values will be set to this float value.
+     * @param {number} [options.min_tokens_to_keep=1] Minimum number of tokens that cannot be filtered.
+     */
+    constructor(top_p, {
+        filter_value = -Infinity,
+        min_tokens_to_keep = 1,
+    } = {}) {
+        super();
+        if (top_p < 0 || top_p > 1.0) {
+            throw new Error(`\`top_p\` must be a float > 0 and < 1, but is ${top_p}`)
+        }
+        if (!Number.isInteger(min_tokens_to_keep) || min_tokens_to_keep < 1) {
+            throw new Error(`\`min_tokens_to_keep\` must be a positive integer, but is ${min_tokens_to_keep}`)
+        }
+
+        this.top_p = top_p
+        this.filter_value = filter_value
+        this.min_tokens_to_keep = min_tokens_to_keep
+    }
+}
+
+/**
+ * [`LogitsWarper`] that performs top-k, i.e. restricting to the k highest probability elements.
+ * Often used together with [`TemperatureLogitsWarper`] and [`TopPLogitsWarper`].
+ */
+export class TopKLogitsWarper extends LogitsWarper {
+    /**
+     * Create a `TopKLogitsWarper`.
+     * @param {number} top_k If set to > 0, only the top `top_k` tokens are kept for generation.
+     * @param {Object} options Additional options for the top-k sampling.
+     * @param {number} [options.filter_value=-Infinity] All filtered values will be set to this float value.
+     * @param {number} [options.min_tokens_to_keep=1] Minimum number of tokens that cannot be filtered.
+     */
+    constructor(top_k, {
+        filter_value = -Infinity,
+        min_tokens_to_keep = 1,
+    } = {}) {
+        super();
+        if (!Number.isInteger(top_k) || top_k < 0) {
+            throw new Error(`\`top_k\` must be a positive integer, but is ${top_k}`)
+        }
+
+        this.top_k = Math.max(top_k, min_tokens_to_keep)
+        this.filter_value = filter_value
+    }
+}
\ No newline at end of file
diff --git a/src/generation/logits_sampler.js b/src/generation/logits_sampler.js
new file mode 100644
index 000000000..46b74e081
--- /dev/null
+++ b/src/generation/logits_sampler.js
@@ -0,0 +1,204 @@
+
+/**
+ * @module generation/logits_sampler
+ */
+
+import { Callable } from "../utils/generic.js";
+import { Tensor, topk } from "../utils/tensor.js";
+
+import {
+    max,
+    softmax,
+} from '../utils/maths.js';
+import { GenerationConfig } from '../generation/configuration_utils.js';
+
+/**
+ * Sampler is a base class for all sampling methods used for text generation.
+ */
+export class LogitsSampler extends Callable {
+    /**
+     * Creates a new Sampler object with the specified generation config.
+     * @param {GenerationConfig} generation_config The generation config.
+     */
+    constructor(generation_config) {
+        super();
+        this.generation_config = generation_config;
+    }
+
+    /**
+     * Executes the sampler, using the specified logits.
+     * @param {Tensor} logits
+     * @returns {Promise<[bigint, number][]>}
+     */
+    async _call(logits) {
+        // Sample from logits, of dims [batch, sequence_length, vocab_size].
+        // If index is specified, sample from [batch, index, vocab_size].
+        return this.sample(logits);
+    }
+
+    /**
+     * Abstract method for sampling the logits.
+     * @param {Tensor} logits
+     * @throws {Error} If not implemented in subclass.
+     * @returns {Promise<[bigint, number][]>}
+     */
+    async sample(logits) {
+        throw Error("sample should be implemented in subclasses.")
+    }
+
+    /**
+     * Returns the specified logits as an array, with temperature applied.
+     * @param {Tensor} logits
+     * @param {number} index
+     * @returns {Float32Array}
+     */
+    getLogits(logits, index) {
+        let vocabSize = logits.dims.at(-1);
+
+        let logs = /** @type {Float32Array} */(logits.data);
+
+        if (index === -1) {
+            logs = logs.slice(-vocabSize);
+        } else {
+            let startIndex = index * vocabSize;
+            logs = logs.slice(startIndex, startIndex + vocabSize);
+        }
+        return logs;
+    }
+
+    /**
+     * Selects an item randomly based on the specified probabilities.
+     * @param {import("../transformers.js").DataArray} probabilities An array of probabilities to use for selection.
+     * @returns {number} The index of the selected item.
+     */
+    randomSelect(probabilities) {
+        // Return index of chosen item
+        let sumProbabilities = 0;
+        for (let i = 0; i < probabilities.length; ++i) {
+            sumProbabilities += probabilities[i];
+        }
+
+        let r = Math.random() * sumProbabilities;
+        for (let i = 0; i < probabilities.length; ++i) {
+            r -= probabilities[i];
+            if (r <= 0) {
+                return i;
+            }
+        }
+        return 0; // return first (most probable) as a fallback
+    }
+
+    /**
+     * Returns a Sampler object based on the specified options.
+     * @param {GenerationConfig} generation_config An object containing options for the sampler.
+     * @returns {LogitsSampler} A Sampler object.
+     */
+    static getSampler(generation_config) {
+        // - *greedy decoding*: `num_beams=1` and `do_sample=False`
+        // - *contrastive search*: `penalty_alpha>0` and `top_k>1`
+        // - *multinomial sampling*: `num_beams=1` and `do_sample=True`
+        // - *beam-search decoding*: `num_beams>1` and `do_sample=False`
+        // - *beam-search multinomial sampling*: `num_beams>1` and `do_sample=True`
+        // - *diverse beam-search decoding*: `num_beams>1` and `num_beam_groups>1`
+        // - *constrained beam-search decoding*: `constraints!=None` or `force_words_ids!=None`
+
+        // NOTE: beam search is implemented directly into the generation function
+        if (generation_config.do_sample) {
+            return new MultinomialSampler(generation_config);
+
+        } else if (generation_config.num_beams > 1) {
+            return new BeamSearchSampler(generation_config);
+
+        } else {
+            if (generation_config.num_return_sequences > 1) {
+                throw Error(`num_return_sequences has to be 1 when doing greedy search, but is ${generation_config.num_return_sequences}.`)
+            }
+            return new GreedySampler(generation_config);
+        }
+    }
+}
+
+/**
+ * Class representing a Greedy Sampler.
+ */
+class GreedySampler extends LogitsSampler {
+    /**
+     * Sample the maximum probability of a given logits tensor.
+     * @param {Tensor} logits
+     * @returns {Promise<[bigint, number][]>} An array with a single tuple, containing the index of the maximum value and a meaningless score (since this is a greedy search).
+     */
+    async sample(logits) {
+        // NOTE: no need to do log_softmax here since we only take the maximum
+        const argmax = max(logits.data)[1];
+
+        // Note: score is meaningless in this context, since we are performing
+        // greedy search (p = 1 => log(p) = 0)
+        return [
+            [BigInt(argmax), 0]
+        ];
+    }
+}
+
+/**
+ * Class representing a MultinomialSampler.
+ */
+class MultinomialSampler extends LogitsSampler {
+
+    /**
+     * Sample from the logits.
+     * @param {Tensor} logits
+     * @returns {Promise<[bigint, number][]>}
+     */
+    async sample(logits) {
+        let k = logits.dims.at(-1); // defaults to vocab size
+        if (this.generation_config.top_k > 0) {
+            k = Math.min(this.generation_config.top_k, k);
+        }
+
+        // Get top k tokens
+        const [v, i] = await topk(logits, k);
+
+        // Compute softmax over logits
+        const probabilities = softmax(/** @type {Float32Array} */(v.data));
+
+        return Array.from({ length: this.generation_config.num_beams }, () => {
+            const sampledIndex = this.randomSelect(probabilities);
+            return [
+                i.data[sampledIndex], // token id
+                Math.log(probabilities[sampledIndex]), // score
+            ];
+        });
+    }
+}
+
+
+/**
+ * Class representing a BeamSearchSampler.
+ */
+class BeamSearchSampler extends LogitsSampler {
+
+    /**
+     * Sample from the logits.
+     * @param {Tensor} logits
+     * @returns {Promise<[bigint, number][]>}
+     */
+    async sample(logits) {
+        let k = logits.dims.at(-1); // defaults to vocab size
+        if (this.generation_config.top_k > 0) {
+            k = Math.min(this.generation_config.top_k, k);
+        }
+
+        // Get top k tokens
+        const [v, i] = await topk(logits, k);
+
+        // Compute softmax over logits
+        const probabilities = softmax(/** @type {Float32Array} */(v.data));
+
+        return Array.from({ length: this.generation_config.num_beams }, (_, x) => {
+            return [
+                i.data[x], // token id
+                Math.log(probabilities[x]), // score
+            ];
+        });
+    }
+}
diff --git a/src/generation/parameters.js b/src/generation/parameters.js
new file mode 100644
index 000000000..1e2f2def3
--- /dev/null
+++ b/src/generation/parameters.js
@@ -0,0 +1,35 @@
+
+/**
+ * @module generation/parameters
+ */
+
+/**
+ * @typedef {Object} GenerationFunctionParameters
+ * @property {import('../utils/tensor.js').Tensor} [inputs=null] (`Tensor` of varying shape depending on the modality, *optional*):
+ * The sequence used as a prompt for the generation or as model inputs to the encoder. If `null` the
+ * method initializes it with `bos_token_id` and a batch size of 1. For decoder-only models `inputs`
+ * should be in the format of `input_ids`. For encoder-decoder models *inputs* can represent any of
+ * `input_ids`, `input_values`, `input_features`, or `pixel_values`.
+ * @property {import('./configuration_utils.js').GenerationConfig} [generation_config=null] (`GenerationConfig`, *optional*):
+ * The generation configuration to be used as base parametrization for the generation call.
+ * `**kwargs` passed to generate matching the attributes of `generation_config` will override them.
+ * If `generation_config` is not provided, the default will be used, which has the following loading
+ * priority:
+ * - (1) from the `generation_config.json` model file, if it exists;
+ * - (2) from the model configuration. Please note that unspecified parameters will inherit [`GenerationConfig`]'s
+ * default values, whose documentation should be checked to parameterize generation.
+ * @property {import('./logits_process.js').LogitsProcessorList} [logits_processor=null] (`LogitsProcessorList`, *optional*):
+ * Custom logits processors that complement the default logits processors built from arguments and
+ * generation config. If a logit processor is passed that is already created with the arguments or a
+ * generation config an error is thrown. This feature is intended for advanced users.
+ * @property {import('./stopping_criteria.js').StoppingCriteriaList} [stopping_criteria=null] (`StoppingCriteriaList`, *optional*):
+ * Custom stopping criteria that complements the default stopping criteria built from arguments and a
+ * generation config. If a stopping criteria is passed that is already created with the arguments or a
+ * generation config an error is thrown. This feature is intended for advanced users.
+ * @property {import('./streamers.js').BaseStreamer} [streamer=null] (`BaseStreamer`, *optional*):
+ * Streamer object that will be used to stream the generated sequences. Generated tokens are passed
+ * through `streamer.put(token_ids)` and the streamer is responsible for any further processing.
+ * @property {number[]} [decoder_input_ids=null] (`number[]`, *optional*):
+ * If the model is an encoder-decoder model, this argument is used to pass the `decoder_input_ids`.
+ * @param {any} [kwargs] (`Dict[str, any]`, *optional*):
+ */
diff --git a/src/generation/stopping_criteria.js b/src/generation/stopping_criteria.js
new file mode 100644
index 000000000..08434f2b4
--- /dev/null
+++ b/src/generation/stopping_criteria.js
@@ -0,0 +1,156 @@
+
+/**
+ * @module generation/stopping_criteria
+ */
+
+import { Callable } from "../utils/generic.js";
+
+// NOTE:
+// Stopping Criteria returns a list of `batch_size` booleans, indicating whether each sequence in the batch should be stopped.
+
+/**
+ * Abstract base class for all stopping criteria that can be applied during generation.
+ */
+export class StoppingCriteria extends Callable {
+    /**
+     * 
+     * @param {number[][]} input_ids (`number[][]` of shape `(batch_size, sequence_length)`):
+     * Indices of input sequence tokens in the vocabulary.
+     * @param {number[][]} scores scores (`number[][]` of shape `(batch_size, config.vocab_size)`):
+     * Prediction scores of a language modeling head. These can be scores for each vocabulary token before SoftMax
+     * or scores for each vocabulary token after SoftMax.
+     * @returns {boolean[]} A list of booleans indicating whether each sequence should be stopped.
+     */
+    _call(input_ids, scores) {
+        throw Error("StoppingCriteria needs to be subclassed");
+    }
+}
+/**
+ */
+export class StoppingCriteriaList extends Callable {
+    /**
+     * Constructs a new instance of `StoppingCriteriaList`.
+     */
+    constructor() {
+        super();
+        this.criteria = [];
+    }
+
+    /**
+     * Adds a new stopping criterion to the list.
+     *
+     * @param {StoppingCriteria} item The stopping criterion to add.
+     */
+    push(item) {
+        this.criteria.push(item);
+    }
+
+    /**
+     * Adds multiple stopping criteria to the list.
+     *
+     * @param {StoppingCriteria|StoppingCriteriaList|StoppingCriteria[]} items The stopping criteria to add.
+     */
+    extend(items) {
+        if (items instanceof StoppingCriteriaList) {
+            items = items.criteria;
+        } else if (items instanceof StoppingCriteria) {
+            items = [items];
+        }
+        this.criteria.push(...items);
+    }
+
+    _call(input_ids, scores) {
+        const is_done = new Array(input_ids.length).fill(false);
+        for (const criterion of this.criteria) {
+            const criterion_done = criterion(input_ids, scores);
+            for (let i = 0; i < is_done.length; ++i) {
+                is_done[i] ||= criterion_done[i];
+            }
+        }
+        return is_done;
+    }
+
+    [Symbol.iterator]() {
+        return this.criteria.values();
+    }
+}
+
+/**
+ * This class can be used to stop generation whenever the full generated number of tokens exceeds `max_length`.
+ * Keep in mind for decoder-only type of transformers, this will include the initial prompted tokens.
+ */
+export class MaxLengthCriteria extends StoppingCriteria {
+
+    /**
+     * 
+     * @param {number} max_length The maximum length that the output sequence can have in number of tokens.
+     * @param {number} [max_position_embeddings=null] The maximum model length, as defined by the model's `config.max_position_embeddings` attribute.
+     */
+    constructor(max_length, max_position_embeddings = null) {
+        super();
+        this.max_length = max_length;
+        this.max_position_embeddings = max_position_embeddings;
+    }
+
+    _call(input_ids) {
+        return input_ids.map(ids => ids.length >= this.max_length);
+    }
+}
+
+// TODO: add MaxTimeCriteria
+
+/**
+ * This class can be used to stop generation whenever the "end-of-sequence" token is generated.
+ * By default, it uses the `model.generation_config.eos_token_id`.
+ */
+export class EosTokenCriteria extends StoppingCriteria {
+
+    /**
+     * 
+     * @param {number|number[]} eos_token_id The id of the *end-of-sequence* token.
+     * Optionally, use a list to set multiple *end-of-sequence* tokens.
+     */
+    constructor(eos_token_id) {
+        super();
+        if (!Array.isArray(eos_token_id)) {
+            eos_token_id = [eos_token_id];
+        }
+        this.eos_token_id = eos_token_id;
+    }
+
+    /**
+     * 
+     * @param {number[][]} input_ids 
+     * @param {number[][]} scores 
+     * @returns {boolean[]}
+     */
+    _call(input_ids, scores) {
+        return input_ids.map(ids => {
+            const last = ids.at(-1);
+            // NOTE: We use == instead of === to allow for number/bigint comparison
+            return this.eos_token_id.some(eos_id => last == eos_id);
+        });
+    }
+}
+
+/**
+ * This class can be used to stop generation whenever the user interrupts the process.
+ */
+export class InterruptableStoppingCriteria extends StoppingCriteria {
+    constructor() {
+        super();
+        this.interrupted = false;
+    }
+
+    interrupt() {
+        this.interrupted = true;
+    }
+
+    reset() {
+        this.interrupted = false;
+    }
+
+    _call(input_ids, scores) {
+        return new Array(input_ids.length).fill(this.interrupted);
+    }
+}
diff --git a/src/generation/streamers.js b/src/generation/streamers.js
new file mode 100644
index 000000000..64afc71c7
--- /dev/null
+++ b/src/generation/streamers.js
@@ -0,0 +1,212 @@
+
+/**
+ * @module generation/streamers
+ */
+
+import { mergeArrays } from '../utils/core.js';
+import { is_chinese_char } from '../tokenizers.js';
+import { apis } from '../env.js';
+
+export class BaseStreamer {
+    /**
+     * Function that is called by `.generate()` to push new tokens
+     * @param {bigint[][]} value 
+     */
+    put(value) {
+        throw Error('Not implemented');
+    }
+
+    /**
+     * Function that is called by `.generate()` to signal the end of generation
+     */
+    end() {
+        throw Error('Not implemented');
+    }
+}
+
+const stdout_write = apis.IS_PROCESS_AVAILABLE
+    ? x => process.stdout.write(x)
+    : x => console.log(x);
+
+/**
+ * Simple text streamer that prints the token(s) to stdout as soon as entire words are formed.
+ */
+export class TextStreamer extends BaseStreamer {
+    /**
+     * 
+     * @param {import('../tokenizers.js').PreTrainedTokenizer} tokenizer 
+     */
+    constructor(tokenizer, {
+        skip_prompt = false,
+        callback_function = null,
+        token_callback_function = null,
+        decode_kwargs = {},
+        ...kwargs
+    } = {}) {
+        super();
+        this.tokenizer = tokenizer;
+        this.skip_prompt = skip_prompt;
+        this.callback_function = callback_function ?? stdout_write;
+        this.token_callback_function = token_callback_function;
+        this.decode_kwargs = { ...decode_kwargs, ...kwargs };
+
+        // variables used in the streaming process
+        this.token_cache = [];
+        this.print_len = 0;
+        this.next_tokens_are_prompt = true;
+    }
+
+    /**
+     * Receives tokens, decodes them, and prints them to stdout as soon as they form entire words.
+     * @param {bigint[][]} value 
+     */
+    put(value) {
+        if (value.length > 1) {
+            throw Error('TextStreamer only supports batch size of 1');
+        }
+
+        if (this.skip_prompt && this.next_tokens_are_prompt) {
+            this.next_tokens_are_prompt = false;
+            return;
+        }
+
+        const tokens = value[0];
+        this.token_callback_function?.(tokens)
+
+        // Add the new token to the cache and decodes the entire thing.
+        this.token_cache = mergeArrays(this.token_cache, tokens);
+        const text = this.tokenizer.decode(this.token_cache, this.decode_kwargs);
+
+        let printable_text;
+        if (text.endsWith('\n')) {
+            // After the symbol for a new line, we flush the cache.
+            printable_text = text.slice(this.print_len);
+            this.token_cache = [];
+            this.print_len = 0;
+        } else if (text.length > 0 && is_chinese_char(text.charCodeAt(text.length - 1))) {
+            // If the last token is a CJK character, we print the characters.
+            printable_text = text.slice(this.print_len);
+            this.print_len += printable_text.length;
+        } else {
+            // Otherwise, prints until the last space char (simple heuristic to avoid printing incomplete words,
+            // which may change with the subsequent token -- there are probably smarter ways to do this!)
+            printable_text = text.slice(this.print_len, text.lastIndexOf(' ') + 1);
+            this.print_len += printable_text.length;
+        }
+
+        this.on_finalized_text(printable_text, false);
+    }
+
+    /**
+     * Flushes any remaining cache and prints a newline to stdout.
+     */
+    end() {
+        let printable_text;
+        if (this.token_cache.length > 0) {
+            const text = this.tokenizer.decode(this.token_cache, this.decode_kwargs);
+            printable_text = text.slice(this.print_len);
+            this.token_cache = [];
+            this.print_len = 0;
+        } else {
+            printable_text = '';
+        }
+        this.next_tokens_are_prompt = true;
+        this.on_finalized_text(printable_text, true);
+    }
+
+    /**
+     * Prints the new text to stdout. If the stream is ending, also prints a newline.
+     * @param {string} text 
+     * @param {boolean} stream_end 
+     */
+    on_finalized_text(text, stream_end) {
+        if (text.length > 0) {
+            this.callback_function?.(text);
+        }
+        if (stream_end && this.callback_function === stdout_write && apis.IS_PROCESS_AVAILABLE) {
+            this.callback_function?.('\n');
+        }
+    }
+}
+
+/**
+ * Utility class to handle streaming of tokens generated by whisper speech-to-text models.
+ * Callback functions are invoked when each of the following events occur:
+ *  - A new chunk starts (on_chunk_start)
+ *  - A new token is generated (callback_function)
+ *  - A chunk ends (on_chunk_end)
+ *  - The stream is finalized (on_finalize)
+ */
+export class WhisperTextStreamer extends TextStreamer {
+    /**
+     * @param {import('../tokenizers.js').WhisperTokenizer} tokenizer
+     * @param {Object} options
+     * @param {boolean} [options.skip_prompt=false] Whether to skip the prompt tokens
+     * @param {function(string): void} [options.callback_function=null] Function to call when a piece of text is ready to display
+     * @param {function(string): void} [options.token_callback_function=null] Function to call when a new token is generated
+     * @param {function(number): void} [options.on_chunk_start=null] Function to call when a new chunk starts
+     * @param {function(number): void} [options.on_chunk_end=null] Function to call when a chunk ends
+     * @param {function(): void} [options.on_finalize=null] Function to call when the stream is finalized
+     * @param {number} [options.time_precision=0.02] Precision of the timestamps
+     * @param {boolean} [options.skip_special_tokens=true] Whether to skip special tokens when decoding
+     * @param {Object} [options.decode_kwargs={}] Additional keyword arguments to pass to the tokenizer's decode method
+     */
+    constructor(tokenizer, {
+        skip_prompt = false,
+        callback_function = null,
+        token_callback_function = null,
+        on_chunk_start = null,
+        on_chunk_end = null,
+        on_finalize = null,
+        time_precision = 0.02,
+        skip_special_tokens = true,
+        decode_kwargs = {},
+    } = {}) {
+        super(tokenizer, {
+            skip_prompt,
+            callback_function,
+            token_callback_function,
+            decode_kwargs: { skip_special_tokens, ...decode_kwargs },
+        });
+        this.timestamp_begin = tokenizer.timestamp_begin;
+
+        this.on_chunk_start = on_chunk_start;
+        this.on_chunk_end = on_chunk_end;
+        this.on_finalize = on_finalize;
+
+        this.time_precision = time_precision;
+
+        this.waiting_for_timestamp = false;
+    }
+
+    /**
+     * @param {bigint[][]} value 
+     */
+    put(value) {
+        if (value.length > 1) {
+            throw Error('WhisperTextStreamer only supports batch size of 1');
+        }
+        const tokens = value[0];
+
+        // Check if the token is a timestamp
+        if (tokens.length === 1) {
+            const offset = Number(tokens[0]) - this.timestamp_begin;
+            if (offset >= 0) {
+                const time = offset * this.time_precision;
+                if (this.waiting_for_timestamp) {
+                    this.on_chunk_end?.(time);
+                } else {
+                    this.on_chunk_start?.(time);
+                }
+                this.waiting_for_timestamp = !this.waiting_for_timestamp; // Toggle
+                value = [[]]; // Skip timestamp
+            }
+        }
+        return super.put(value);
+    }
+
+    end() {
+        super.end();
+        this.on_finalize?.();
+    }
+}
diff --git a/src/models.js b/src/models.js
index b6b1c71b1..b7d2b0ee2 100644
--- a/src/models.js
+++ b/src/models.js
@@ -5,11 +5,11 @@
  * **Example:** Load and run an `AutoModel`.
  * 
  * ```javascript
- * import { AutoModel, AutoTokenizer } from '@xenova/transformers';
- *
+ * import { AutoModel, AutoTokenizer } from '@huggingface/transformers';
+ * 
  * let tokenizer = await AutoTokenizer.from_pretrained('Xenova/bert-base-uncased');
  * let model = await AutoModel.from_pretrained('Xenova/bert-base-uncased');
- *
+ * 
  * let inputs = await tokenizer('I love transformers!');
  * let { logits } = await model(inputs);
  * // Tensor {
@@ -24,11 +24,11 @@
  * 
  * **Example:** Load and run an `AutoModelForSeq2SeqLM`.
  * ```javascript
- * import { AutoModelForSeq2SeqLM, AutoTokenizer } from '@xenova/transformers';
+ * import { AutoModelForSeq2SeqLM, AutoTokenizer } from '@huggingface/transformers';
  * 
  * let tokenizer = await AutoTokenizer.from_pretrained('Xenova/t5-small');
  * let model = await AutoModelForSeq2SeqLM.from_pretrained('Xenova/t5-small');
- *
+ * 
  * let { input_ids } = await tokenizer('translate English to German: I love transformers!');
  * let outputs = await model.generate(input_ids);
  * let decoded = tokenizer.decode(outputs[0], { skip_special_tokens: true });
@@ -40,13 +40,30 @@
 
 import {
     AutoConfig,
+    getKeyValueShapes,
 } from './configs.js';
 
+import {
+    deviceToExecutionProviders,
+    createInferenceSession,
+    isONNXTensor,
+    isONNXProxy,
+} from './backends/onnx.js';
+import {
+    DATA_TYPES,
+    DEFAULT_DEVICE_DTYPE_MAPPING,
+    DEFAULT_DTYPE_SUFFIX_MAPPING,
+    isWebGpuFp16Supported,
+} from './utils/dtypes.js';
+
 import {
     Callable,
+} from './utils/generic.js';
+
+import {
     isIntegralNumber,
-    isTypedArray,
     mergeArrays,
+    pick,
 } from './utils/core.js';
 
 import {
@@ -54,10 +71,12 @@ import {
     getModelJSON,
 } from './utils/hub.js';
 
+import {
+    GITHUB_ISSUE_URL,
+} from './utils/constants.js';
+
 import {
     LogitsProcessorList,
-    GenerationConfig,
-    ForceTokensLogitsProcessor,
     ForcedBOSTokenLogitsProcessor,
     ForcedEOSTokenLogitsProcessor,
     SuppressTokensAtBeginLogitsProcessor,
@@ -68,24 +87,35 @@ import {
     MinLengthLogitsProcessor,
     MinNewTokensLengthLogitsProcessor,
 
-    Sampler,
-} from './utils/generation.js';
+    TemperatureLogitsWarper,
+    TopKLogitsWarper,
+    TopPLogitsWarper,
+    ClassifierFreeGuidanceLogitsProcessor,
+} from './generation/logits_process.js';
+
+import {
+    GenerationConfig,
+} from './generation/configuration_utils.js';
 
 import {
     cat,
-    dynamicTimeWarping,
+    full_like,
     mean,
+    ones,
     ones_like,
     stack,
     std_mean,
     Tensor,
+    zeros_like,
 } from './utils/tensor.js';
 
-import { executionProviders, ONNX } from './backends/onnx.js';
-import { medianFilter } from './transformers.js';
-const { InferenceSession, Tensor: ONNXTensor, env } = ONNX;
+import { dynamic_time_warping, medianFilter } from './utils/maths.js';
+import { EosTokenCriteria, MaxLengthCriteria, StoppingCriteriaList } from './generation/stopping_criteria.js';
+import { LogitsSampler } from './generation/logits_sampler.js';
+import { apis } from './env.js';
 
-/** @typedef {import('onnxruntime-web').InferenceSession} InferenceSession */
+import { WhisperGenerationConfig } from './models/whisper/generation_whisper.js';
+import { whisper_language_to_code } from './models/whisper/common_whisper.js';
 
 //////////////////////////////////////////////////
 // Model types: used internally
@@ -96,6 +126,8 @@ const MODEL_TYPES = {
     Vision2Seq: 3,
     DecoderOnly: 4,
     MaskGeneration: 5,
+    ImageTextToText: 6,
+    Musicgen: 7,
 }
 //////////////////////////////////////////////////
 
@@ -113,40 +145,183 @@ const MODEL_CLASS_TO_NAME_MAPPING = new Map();
  * Constructs an InferenceSession using a model file located at the specified path.
  * @param {string} pretrained_model_name_or_path The path to the directory containing the model file.
  * @param {string} fileName The name of the model file.
- * @param {import('./utils/hub.js').PretrainedOptions} options Additional options for loading the model.
- * @returns {Promise<InferenceSession>} A Promise that resolves to an InferenceSession object.
+ * @param {import('./utils/hub.js').PretrainedModelOptions} options Additional options for loading the model.
+ * @returns {Promise<{buffer: Uint8Array, session_options: Object, session_config: Object}>} A Promise that resolves to the data needed to create an InferenceSession object.
  * @private
  */
-async function constructSession(pretrained_model_name_or_path, fileName, options) {
-    // TODO add option for user to force specify their desired execution provider
-    let modelFileName = `onnx/${fileName}${options.quantized ? '_quantized' : ''}.onnx`;
-    let buffer = await getModelFile(pretrained_model_name_or_path, modelFileName, true, options);
+async function getSession(pretrained_model_name_or_path, fileName, options) {
+    const custom_config = options.config?.['transformers.js_config'] ?? {};
+    let device = options.device ?? custom_config.device;
+    if (device && typeof device !== 'string') {
+        if (device.hasOwnProperty(fileName)) {
+            device = device[fileName];
+        } else {
+            console.warn(`device not specified for "${fileName}". Using the default device.`);
+            device = null;
+        }
+    }
 
-    try {
-        return await InferenceSession.create(buffer, {
-            executionProviders,
-        });
-    } catch (err) {
-        // If the execution provided was only wasm, throw the error
-        if (executionProviders.length === 1 && executionProviders[0] === 'wasm') {
-            throw err;
+    // If the device is not specified, we use the default (supported) execution providers.
+    const selectedDevice = /** @type {import("./utils/devices.js").DeviceType} */(
+        device ?? (apis.IS_NODE_ENV ? 'cpu' : 'wasm')
+    );
+    const executionProviders = deviceToExecutionProviders(selectedDevice);
+
+    // If options.dtype is specified, we use it to choose the suffix for the model file.
+    // Otherwise, we use the default dtype for the device.
+    let dtype = options.dtype ?? custom_config.dtype;
+    if (typeof dtype !== 'string') {
+        if (dtype && dtype.hasOwnProperty(fileName)) {
+            dtype = dtype[fileName];
+        } else {
+            dtype = DEFAULT_DEVICE_DTYPE_MAPPING[selectedDevice] ?? DATA_TYPES.fp32;
+            console.warn(`dtype not specified for "${fileName}". Using the default dtype (${dtype}) for this device (${selectedDevice}).`);
         }
+    }
+
+    const selectedDtype = /** @type {import("./utils/dtypes.js").DataType} */(dtype);
+
+    if (!DEFAULT_DTYPE_SUFFIX_MAPPING.hasOwnProperty(selectedDtype)) {
+        throw new Error(`Invalid dtype: ${selectedDtype}. Should be one of: ${Object.keys(DATA_TYPES).join(', ')}`);
+    } else if (selectedDtype === DATA_TYPES.fp16 && selectedDevice === 'webgpu' && !(await isWebGpuFp16Supported())) {
+        throw new Error(`The device (${selectedDevice}) does not support fp16.`);
+    }
+
+    // Only valid for models with a decoder
+    const kv_cache_dtype = custom_config.kv_cache_dtype
+        ? (typeof custom_config.kv_cache_dtype === 'string'
+            ? custom_config.kv_cache_dtype
+            : custom_config.kv_cache_dtype[selectedDtype] ?? 'float32')
+        : undefined;
+
+    if (kv_cache_dtype && !['float32', 'float16'].includes(kv_cache_dtype)) {
+        throw new Error(`Invalid kv_cache_dtype: ${kv_cache_dtype}. Should be one of: float32, float16`);
+    }
+
+    const session_config = {
+        dtype: selectedDtype,
+        kv_cache_dtype,
+    }
+
+    // Construct the model file name
+    const suffix = DEFAULT_DTYPE_SUFFIX_MAPPING[selectedDtype];
+    const modelFileName = `${options.subfolder ?? ''}/${fileName}${suffix}.onnx`;
+
+    const session_options = { ...options.session_options };
+
+    // Overwrite `executionProviders` if not specified
+    session_options.executionProviders ??= executionProviders;
 
-        console.warn(err);
+    // Overwrite `freeDimensionOverrides` if specified in config and not set in session options
+    const free_dimension_overrides = custom_config.free_dimension_overrides;
+    if (free_dimension_overrides) {
+        session_options.freeDimensionOverrides ??= free_dimension_overrides;
+    } else if (selectedDevice.startsWith('webnn') && !session_options.freeDimensionOverrides) {
         console.warn(
-            'Something went wrong during model construction (most likely a missing operation). ' +
-            'Using `wasm` as a fallback. '
+            'WebNN does not currently support dynamic shapes and requires `free_dimension_overrides` to be set in config.json as a field within "transformers.js_config". ' +
+            'When `free_dimension_overrides` is not set, you may experience significant performance degradation.'
+        );
+    }
+
+    const bufferPromise = getModelFile(pretrained_model_name_or_path, modelFileName, true, options);
+
+    // handle onnx external data files
+    const use_external_data_format = options.use_external_data_format ?? custom_config.use_external_data_format;
+    /** @type {Promise<{path: string, data: Uint8Array}>[]} */
+    let externalDataPromises = [];
+    if (use_external_data_format && (
+        use_external_data_format === true ||
+        (
+            typeof use_external_data_format === 'object' &&
+            use_external_data_format.hasOwnProperty(fileName) &&
+            use_external_data_format[fileName] === true
         )
-        return await InferenceSession.create(buffer, {
-            executionProviders: ['wasm']
+    )) {
+        if (apis.IS_NODE_ENV) {
+            throw new Error('External data format is not yet supported in Node.js');
+        }
+        const path = `${fileName}${suffix}.onnx_data`;
+        const fullPath = `${options.subfolder ?? ''}/${path}`;
+        externalDataPromises.push(new Promise(async (resolve, reject) => {
+            const data = await getModelFile(pretrained_model_name_or_path, fullPath, true, options);
+            resolve({ path, data })
+        }));
+
+    } else if (session_options.externalData !== undefined) {
+        externalDataPromises = session_options.externalData.map(async (ext) => {
+            // if the external data is a string, fetch the file and replace the string with its content
+            if (typeof ext.data === "string") {
+                const ext_buffer = await getModelFile(pretrained_model_name_or_path, ext.data, true, options);
+                return { ...ext, data: ext_buffer };
+            }
+            return ext;
         });
     }
+
+    if (externalDataPromises.length > 0) {
+        session_options.externalData = await Promise.all(externalDataPromises);
+    }
+
+    if (selectedDevice === 'webgpu') {
+        const shapes = getKeyValueShapes(options.config, {
+            prefix: 'present',
+        });
+        if (Object.keys(shapes).length > 0 && !isONNXProxy()) {
+            // Only set preferredOutputLocation if shapes are present and we aren't proxying ONNX
+            /** @type {Record<string, import('onnxruntime-common').Tensor.DataLocation>} */
+            const preferredOutputLocation = {};
+            for (const key in shapes) {
+                preferredOutputLocation[key] = 'gpu-buffer';
+            }
+            session_options.preferredOutputLocation = preferredOutputLocation;
+        }
+    }
+
+    const buffer = await bufferPromise;
+
+    return { buffer, session_options, session_config };
+}
+
+/**
+ * Helper function to create multiple InferenceSession objects.
+ * 
+ * @param {string} pretrained_model_name_or_path The path to the directory containing the model file.
+ * @param {Record<string, string>} names The names of the model files to load.
+ * @param {import('./utils/hub.js').PretrainedModelOptions} options Additional options for loading the model.
+ * @returns {Promise<Record<string, any>>} A Promise that resolves to a dictionary of InferenceSession objects.
+ * @private
+ */
+async function constructSessions(pretrained_model_name_or_path, names, options) {
+    return Object.fromEntries(await Promise.all(
+        Object.keys(names).map(async (name) => {
+            const { buffer, session_options, session_config } = await getSession(pretrained_model_name_or_path, names[name], options);
+            const session = await createInferenceSession(buffer, session_options, session_config);
+            return [name, session];
+        })
+    ));
+}
+
+/**
+ * Helper function to load multiple optional configuration files
+ * @param {string} pretrained_model_name_or_path The path to the directory containing the config file.
+ * @param {Record<string, string>} names The names of the config files to load.
+ * @param {import('./utils/hub.js').PretrainedModelOptions} options Additional options for loading the configs.
+ * @returns {Promise<Record<string, any>>} A Promise that resolves to a dictionary of configuration objects.
+ * @private
+ */
+async function getOptionalConfigs(pretrained_model_name_or_path, names, options) {
+    return Object.fromEntries(await Promise.all(
+        Object.keys(names).map(async (name) => {
+            const config = await getModelJSON(pretrained_model_name_or_path, names[name], false, options);
+            return [name, config];
+        })
+    ));
 }
 
 /**
  * Validate model inputs
- * @param {InferenceSession} session The InferenceSession object that will be run.
- * @param {Record<string, Tensor>} inputs The inputs to check.
+ * @param {Object} session The InferenceSession object that will be run.
+ * @param {Object} inputs The inputs to check.
  * @returns {Record<string, Tensor>} The checked inputs.
  * @throws {Error} If any inputs are missing.
  * @private
@@ -170,7 +345,7 @@ function validateInputs(session, inputs) {
         // NOTE: When `env.wasm.proxy is true` the tensor is moved across the Worker
         // boundary, transferring ownership to the worker and invalidating the tensor.
         // So, in this case, we simply sacrifice a clone for it.
-        checkedInputs[inputName] = env.wasm.proxy ? tensor.clone() : tensor;
+        checkedInputs[inputName] = isONNXProxy() ? tensor.clone() : tensor;
     }
     if (missingInputs.length > 0) {
         throw new Error(
@@ -195,7 +370,7 @@ function validateInputs(session, inputs) {
  *  - If additional inputs are passed, they will be ignored.
  *  - If inputs are missing, an error will be thrown.
  * 
- * @param {InferenceSession} session The InferenceSession object to run.
+ * @param {Object} session The InferenceSession object to run.
  * @param {Object} inputs An object that maps input names to input tensors.
  * @returns {Promise<Object>} A Promise that resolves to an object that maps output names to output tensors.
  * @private
@@ -203,8 +378,9 @@ function validateInputs(session, inputs) {
 async function sessionRun(session, inputs) {
     const checkedInputs = validateInputs(session, inputs);
     try {
-        // @ts-ignore
-        let output = await session.run(checkedInputs);
+        // pass the original ort tensor
+        const ortFeed = Object.fromEntries(Object.entries(checkedInputs).map(([k, v]) => [k, v.ort_tensor]));
+        let output = await session.run(ortFeed);
         output = replaceTensors(output);
         return output;
     } catch (e) {
@@ -223,7 +399,7 @@ async function sessionRun(session, inputs) {
  */
 function replaceTensors(obj) {
     for (let prop in obj) {
-        if (obj[prop] instanceof ONNXTensor) {
+        if (isONNXTensor(obj[prop])) {
             obj[prop] = new Tensor(obj[prop]);
         } else if (typeof obj[prop] === 'object') {
             replaceTensors(obj[prop]);
@@ -268,72 +444,6 @@ function toI64Tensor(items) {
     }
 }
 
-/**
- * Prepares an attention mask for a sequence of tokens based on configuration options.
- * @param {Object} self The calling object instance.
- * @param {Tensor} tokens The input tokens.
- * @returns {Tensor} The attention mask tensor.
- * @private
- */
-function prepareAttentionMask(self, tokens) {
-
-    // Prepare attention mask
-    let pad_token_id = self.config.pad_token_id ?? null;
-    let eos_token_id = self.config.eos_token_id ?? null;
-    if (isIntegralNumber(eos_token_id)) {
-        eos_token_id = [eos_token_id];
-    }
-
-    let is_pad_token_in_inputs = tokens.indexOf(pad_token_id) !== -1;
-    let is_pad_token_not_equal_to_eos_token_id = (eos_token_id === null) || !eos_token_id.includes(pad_token_id)
-
-    if (is_pad_token_in_inputs && is_pad_token_not_equal_to_eos_token_id) {
-        let data = BigInt64Array.from(
-            // Note: != so that int matches bigint
-            // @ts-ignore
-            tokens.data.map(x => x != pad_token_id)
-        )
-        return new Tensor('int64', data, tokens.dims)
-    } else {
-        return ones_like(tokens);
-    }
-}
-
-/**
- * Add position IDs to the feeds object.
- * @param {Object} session The inference session.
- * @param {Object} feeds The input to the model.
- * @param {boolean} use_cache_branch Whether to use the cache branch of the model.
- * @returns {void}
- * @private
- */
-function preparePositionIds(session, feeds, use_cache_branch) {
-    if (!session.inputNames.includes('position_ids')) return;
-
-    const data = new BigInt64Array(feeds.attention_mask.data.length);
-
-    // Compute cumulative sum of the attention mask along the sequence length dimension
-    for (let i = 0; i < feeds.attention_mask.dims[0]; ++i) {
-        let start = i * feeds.attention_mask.dims[1];
-        let sum = BigInt(0);
-        for (let j = 0; j < feeds.attention_mask.dims[1]; ++j) {
-            const index = start + j;
-            if (feeds.attention_mask.data[index] === 0n) {
-                data[index] = BigInt(1);
-            } else { // === 1n
-                data[index] = sum;
-                sum += feeds.attention_mask.data[index];
-            }
-        }
-    }
-
-    feeds.position_ids = new Tensor('int64', data, feeds.attention_mask.dims);
-
-    if (use_cache_branch) {
-        feeds.position_ids = feeds.position_ids.slice(null, -1).unsqueeze_(-1);
-    }
-}
-
 /**
  * Creates a boolean tensor with a single value.
  * @param {boolean} value The value of the tensor.
@@ -353,162 +463,44 @@ function boolTensor(value) {
  * @private
  */
 async function seq2seqForward(self, model_inputs) {
-
-    let { encoder_outputs, past_key_values } = model_inputs;
-
+    let { encoder_outputs, input_ids, decoder_input_ids, ...other_decoder_inputs } = model_inputs;
+    // Encode if needed
     if (!encoder_outputs) {
+        const encoder_inputs = pick(model_inputs, self.sessions['model'].inputNames);
         // Encoder outputs are not given, so we must compute them.
-        encoder_outputs = (await encoderForward(self, model_inputs)).last_hidden_state;
+        encoder_outputs = (await encoderForward(self, encoder_inputs)).last_hidden_state;
     }
-    let decoderFeeds = {
-        input_ids: model_inputs.decoder_input_ids,
-        encoder_hidden_states: encoder_outputs,
-    };
-    const use_cache_branch = !!past_key_values;
-
-    if (self.decoder_merged_session.inputNames.includes('use_cache_branch')) {
-        decoderFeeds.use_cache_branch = boolTensor(use_cache_branch);
-    }
-
-    if (self.decoder_merged_session.inputNames.includes('encoder_attention_mask')) {
-        decoderFeeds.encoder_attention_mask = model_inputs.attention_mask
-    }
-
-    preparePositionIds(self.decoder_merged_session, decoderFeeds, use_cache_branch);
-    self.addPastKeyValues(decoderFeeds, past_key_values);
-
-    const decoderResults = await sessionRun(self.decoder_merged_session, decoderFeeds);
-    let logits = decoderResults.logits;
-    past_key_values = self.getPastKeyValues(decoderResults, past_key_values);
-
-    // Get cross attention and/or decoder attentions if they are present
-    const attns = self.getAttentions(decoderResults);
-
-    return new Seq2SeqLMOutput({ logits, past_key_values, encoder_outputs, ...attns });
-}
-
-/**
- * Start the beam search process for the seq2seq model.
- * @param {PreTrainedModel} self The seq2seq model object.
- * @param {Tensor} inputTokenIds Array of input token ids for each input sequence.
- * @param {Object} generation_config The generation config.
- * @param {number} numOutputTokens The maximum number of output tokens for the model.
- * @returns {Object[]} Array of beam search objects.
- * @private
- */
-function seq2seqStartBeams(self, inputTokenIds, generation_config, numOutputTokens) {
-    let beams = [];
-    let beamId = 0;
-
-    // @ts-ignore
-    const requires_attention_mask = self.requires_attention_mask ?? true;
-
-    // decoder_input_ids == output_token_ids
-    let decoder_input_ids =
-        generation_config.decoder_input_ids
-        ?? generation_config.decoder_start_token_id
-        ?? generation_config.bos_token_id
-        ?? generation_config.eos_token_id;
-
-    // Support input as tensor or list
-    // TODO support batched decoder_input_ids
-    if (decoder_input_ids instanceof Tensor) {
-        decoder_input_ids = decoder_input_ids.tolist().flat();
-    } else if (!Array.isArray(decoder_input_ids)) {
-        decoder_input_ids = [decoder_input_ids];
-    }
-
-    for (let tokens of inputTokenIds) {
-        // TODO: Improve
-        // Currently, just add back batch dimension.
-        // In future, allow for true parallel execution
-        tokens.dims = [1, ...tokens.dims]
-
-        // Create beam
-        let start = {
-            inputs: tokens,
-            encoder_outputs: null,
-            prev_model_outputs: null,
-
-            output_token_ids: decoder_input_ids,
-            done: false,
-            score: 0,
-            id: beamId++ // assign unique id to beams
-        }
-
-        if (requires_attention_mask) {
-            start.attention_mask = prepareAttentionMask(self, tokens);
-        }
-
-        beams.push(start);
-    }
-
-    return beams;
-}
-
-/**
- * Run beam search on the seq2seq model for a single beam.
- * @param {PreTrainedModel} self The seq2seq model object.
- * @param {Object} beam The beam search object for which to run the model.
- * @param {Object} options options
- * @param {string} [options.input_name='input_ids'] The name of the input tensor for the encoder.
- * @returns {Promise<Object>} Promise that resolves with the output of the seq2seq model for the given beam.
- * @private
- */
-async function seq2seqRunBeam(self, beam) {
-    const input_name = self.main_input_name;
 
-    let decoder_input_ids = beam.output_token_ids;
-    if (beam.prev_model_outputs) {
-        // After the first step, `prev_model_outputs` won't be null.
-        // So, we cut decoder_input_ids if past is used
-        decoder_input_ids = decoder_input_ids.slice(-1);
-    }
+    other_decoder_inputs.input_ids = decoder_input_ids;
+    other_decoder_inputs.encoder_hidden_states = encoder_outputs;
 
-    // 1. Prepare
-    let model_inputs = {
-        [input_name]: beam.inputs,
-        decoder_input_ids: toI64Tensor(decoder_input_ids),
-        encoder_outputs: beam.encoder_outputs,
-        past_key_values: beam.prev_model_outputs?.past_key_values,
+    if (self.sessions['decoder_model_merged'].inputNames.includes('encoder_attention_mask')) {
+        other_decoder_inputs.encoder_attention_mask = model_inputs.attention_mask
     }
-    if (beam.attention_mask) {
-        model_inputs.attention_mask = beam.attention_mask
-    }
-
-    // 2. Run
-    let output = await self.forward(model_inputs);
 
-    // 3. Update
-    beam.prev_model_outputs = output;
-    beam.encoder_outputs = output.encoder_outputs;
+    const decoderResults = await decoderForward(self, other_decoder_inputs, true);
 
-    return output;
-}
-
-/**
- * Update a beam with a new token ID.
- * @param {Object} beam The beam to update.
- * @param {number} newTokenId The new token ID to add to the beam's output.
- * @private
- */
-function seq2seqUpdatebeam(beam, newTokenId) {
-    beam.output_token_ids = [...beam.output_token_ids, newTokenId];
+    return decoderResults;
 }
 
 /**
  * Forward pass of an encoder model.
  * @param {Object} self The encoder model.
  * @param {Object} model_inputs The input data to be used for the forward pass.
- * @returns {Promise<Object>} Promise that resolves with an object containing the model's outputs.
+ * @returns {Promise<Object>} The model's outputs.
  * @private
  */
 async function encoderForward(self, model_inputs) {
-    const encoderFeeds = Object.create(null);
-    for (const key of self.session.inputNames) {
-        encoderFeeds[key] = model_inputs[key];
+    const session = self.sessions['model'];
+    const encoderFeeds = pick(model_inputs, session.inputNames);
+
+    if (session.inputNames.includes('inputs_embeds') && !encoderFeeds.inputs_embeds) {
+        if (!model_inputs.input_ids) {
+            throw new Error('Both `input_ids` and `inputs_embeds` are missing in the model inputs.');
+        }
+        encoderFeeds.inputs_embeds = await self.encode_text({ input_ids: model_inputs.input_ids });
     }
-    if (self.session.inputNames.includes('token_type_ids') && !encoderFeeds.token_type_ids) {
+    if (session.inputNames.includes('token_type_ids') && !encoderFeeds.token_type_ids) {
         // Assign default `token_type_ids` (all zeroes) to the `encoderFeeds` if the model expects it,
         // but they weren't created by the tokenizer.
         encoderFeeds.token_type_ids = new Tensor(
@@ -517,136 +509,211 @@ async function encoderForward(self, model_inputs) {
             encoderFeeds.input_ids.dims
         )
     }
-    return await sessionRun(self.session, encoderFeeds);
+    return await sessionRun(session, encoderFeeds);
 }
 
-
 /**
  * Forward pass of a decoder model.
  * @param {Object} self The decoder model.
  * @param {Object} model_inputs The input data to be used for the forward pass.
- * @returns {Promise<Object>} Promise that resolves with an object containing the logits and past key values.
+ * @returns {Promise<Object>} The logits and past key values.
  * @private
  */
-async function decoderForward(self, model_inputs) {
-    let { input_ids, past_key_values, attention_mask } = model_inputs;
-    let decoderFeeds = {
-        input_ids: input_ids,
-        attention_mask: attention_mask ?? prepareAttentionMask(self, input_ids),
-    }
-    const use_cache_branch = !!past_key_values;
-
-    if (self.session.inputNames.includes('use_cache_branch')) {
-        decoderFeeds.use_cache_branch = boolTensor(use_cache_branch);
-    }
+async function decoderForward(self, model_inputs, is_encoder_decoder = false) {
 
-    preparePositionIds(self.session, decoderFeeds, use_cache_branch);
+    const session = self.sessions[
+        is_encoder_decoder ? 'decoder_model_merged' : 'model'
+    ]
 
-    self.addPastKeyValues(decoderFeeds, past_key_values);
+    const { past_key_values, ...new_model_inputs } = model_inputs;
 
-    let decoderResults = await sessionRun(self.session, decoderFeeds);
+    if (session.inputNames.includes('use_cache_branch')) {
+        new_model_inputs.use_cache_branch = boolTensor(!!past_key_values);
+    }
+    if (session.inputNames.includes('position_ids') && new_model_inputs.attention_mask && !new_model_inputs.position_ids) {
+        new_model_inputs.position_ids = createPositionIds(new_model_inputs, past_key_values);
+    }
 
-    let logits = decoderResults.logits;
+    // Unpack the `past_key_values` object into model inputs
+    self.addPastKeyValues(new_model_inputs, past_key_values);
 
-    past_key_values = self.getPastKeyValues(decoderResults, past_key_values);
-    return { logits, past_key_values };
+    // Select only the inputs that are needed for the current session
+    const fixed = pick(new_model_inputs, session.inputNames);
+    return await sessionRun(session, fixed);
 }
 
+
 /**
- * Starts the generation of text by initializing the beams for the given input token IDs.
- * @param {Object} self The text generation model object.
- * @param {Tensor} inputTokenIds An tensor of input token IDs to generate text from.
- * @param {Object} generation_config The generation config.
- * @param {number} numOutputTokens The maximum number of tokens to generate for each beam.
- * @param {Tensor} [inputs_attention_mask] The attention mask tensor for the input token IDs.
- * @returns {Object[]} An array of beams initialized with the given inputs and parameters.
+ * Forward pass of an image-text-to-text model.
+ * @param {Object} self The image-text-to-text model model.
+ * @param {Object} model_inputs The input data to be used for the forward pass.
+ * @param {Tensor} [model_inputs.input_ids=null]
+ * @param {Tensor} [model_inputs.attention_mask=null]
+ * @param {Tensor} [model_inputs.pixel_values=null]
+ * @param {Tensor} [model_inputs.position_ids=null]
+ * @param {Tensor} [model_inputs.inputs_embeds=null]
+ * @param {Tensor} [model_inputs.past_key_values=null]
+ * @param {Object} [model_inputs.generation_config=null]
+ * @param {Object} [model_inputs.logits_processor=null]
+ * @returns {Promise<Tensor>} The model's output tensor
  * @private
  */
-function decoderStartBeams(self, inputTokenIds, generation_config, numOutputTokens, inputs_attention_mask) {
-    let beams = [];
-
-    let beamId = 0;
-    for (let tokens of inputTokenIds) {
-        let output_token_ids = tokens.tolist().map(Number);
-
-        // TODO: Improve
-        // Currently, just add back batch dimension.
-        // In future, allow for true parallel execution
-        tokens.dims = [1, ...tokens.dims]
+async function imageTextToTextForward(self, {
+    // Produced by the tokenizer/processor:
+    input_ids = null,
+    attention_mask = null,
+    pixel_values = null,
+
+    // Used during generation:
+    position_ids = null,
+    inputs_embeds = null,
+    past_key_values = null,
+
+    // Generic generation parameters
+    generation_config = null,
+    logits_processor = null,
+
+    // TODO: needed?
+    ...kwargs
+}) {
+
+    if (!inputs_embeds) {
+        // 1. Extract the input embeddings
+        inputs_embeds = await self.encode_text({ input_ids });
+
+        // 2. Possibly, merge text and images
+        if (pixel_values && input_ids.dims[1] !== 1) {
+            const image_features = await self.encode_image({ pixel_values });
+
+            ({ inputs_embeds, attention_mask } = self._merge_input_ids_with_image_features({
+                image_features,
+                inputs_embeds,
+                input_ids,
+                attention_mask,
+            }));
 
-        let attn_mask;
-        if (inputs_attention_mask) {
-            attn_mask = inputs_attention_mask[beamId];
-            attn_mask.dims = [1, ...attn_mask.dims]
+        } else if (past_key_values && pixel_values && input_ids.dims[1] === 1) {
+            // This is the case when we are generating with cache
+            const target_length = input_ids.dims[1]; // always 1
+            const past_length = Object.values(past_key_values)[0].dims.at(-2);
 
-        } else {
-            attn_mask = prepareAttentionMask(self, tokens)
+            attention_mask = cat([
+                ones([input_ids.dims[0], past_length]),
+                attention_mask.slice(null, [attention_mask.dims[1] - target_length, attention_mask.dims[1]]),
+            ], 1);
         }
+    }
 
-        let start = {
-            input: tokens,
-            model_input_ids: tokens,
-            attention_mask: attn_mask,
-            prev_model_outputs: null,
-
-            output_token_ids: output_token_ids,
-            num_output_tokens: numOutputTokens,
-
-            done: false,
-            score: 0,
-            id: beamId++ // assign unique id to beams
+    const outputs = await decoderForward(self, {
+        inputs_embeds,
+        past_key_values,
+        attention_mask,
+        position_ids,
+        generation_config,
+        logits_processor,
+    }, true);
+    return outputs;
+}
+
+function createPositionIds(model_inputs, past_key_values = null) {
+    // If the model supports providing position_ids, we create position_ids on the fly for batch generation,
+    // by computing the cumulative sum of the attention mask along the sequence length dimension.
+    // 
+    // Equivalent to:
+    // position_ids = attention_mask.long().cumsum(-1) - 1
+    // position_ids.masked_fill_(attention_mask == 0, 1)
+    // if past_key_values:
+    //     position_ids = position_ids[:, -input_ids.shape[1] :]
+    const { input_ids, inputs_embeds, attention_mask } = model_inputs;
+    const [bz, seq_len] = attention_mask.dims;
+
+    const data = new BigInt64Array(attention_mask.data.length);
+    for (let i = 0; i < bz; ++i) {
+        const start = i * seq_len;
+        let sum = BigInt(0);
+        for (let j = 0; j < seq_len; ++j) {
+            const index = start + j;
+            if (attention_mask.data[index] === 0n) {
+                data[index] = BigInt(1);
+            } else { // === 1n
+                data[index] = sum;
+                sum += attention_mask.data[index];
+            }
         }
+    }
 
-        beams.push(start);
+    let position_ids = new Tensor('int64', data, attention_mask.dims);
+    if (past_key_values) {
+        const offset = -(input_ids ?? inputs_embeds).dims.at(1);
+        position_ids = position_ids.slice(null, [offset, null]);
     }
-    return beams;
+    return position_ids;
 }
 
-/**
- * Runs a single step of the text generation process for a given beam.
- *
- * @param {Object} self The decoder object.
- * @param {Object} beam The beam to run.
- * @param {Tensor} beam.input The input tensor.
- * @param {Tensor} beam.model_input_ids The input ids to the model.
- * @param {Tensor} beam.attention_mask The attention mask.
- * @param {Object} beam.prev_model_outputs The past key values.
- * @param {number[]} beam.output_token_ids The output token ids.
- * @returns {Promise<Object>} The output of the generation step.
- * @private
- */
-async function decoderRunBeam(self, beam) {
-    let attnMaskData = new BigInt64Array(beam.output_token_ids.length).fill(1n)
+function decoder_prepare_inputs_for_generation(self, input_ids, model_inputs, generation_config) {
+    if (model_inputs.past_key_values) {
+        const past_length = Object.values(model_inputs.past_key_values)[0].dims.at(-2);
+        const { input_ids, attention_mask } = model_inputs;
 
-    // 1. Prepare
-    let model_inputs = {
-        input_ids: beam.model_input_ids,
-        attention_mask: new Tensor(
-            'int64',
-            attnMaskData,
-            [1, attnMaskData.length]
-        ),
-        past_key_values: beam.prev_model_outputs?.past_key_values,
+        // Keep only the unprocessed tokens:
+        // 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
+        // some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as
+        // input)
+        if (attention_mask && attention_mask.dims[1] > input_ids.dims[1]) {
+            // NOTE: not needed since we only pass the generated tokens to the next forward pass
+            // const offset = -(attention_mask.dims[1] - past_length);
+            // model_inputs.input_ids = input_ids.slice(null, [offset, null]);
+        }
+        // 2 - If the past_length is smaller than input_ids', then input_ids holds all input tokens.
+        // We can discard input_ids based on the past_length.
+        else if (past_length < input_ids.dims[1]) {
+            // NOTE: Required for phi models.
+            // See https://github.com/huggingface/transformers/issues/30809#issuecomment-2111918479 for more information.
+            model_inputs.input_ids = input_ids.slice(null, [past_length, null]);
+        }
+        // 3 - Otherwise (past_length >= input_ids.shape[1]), let's assume input_ids only has unprocessed tokens.
+        else {
+            if (
+                // NOTE: Only used by VLMs (!= so that null matches undefined)
+                self.config.image_token_index != null &&
+                // Equivalent to `self.config.image_token_index in input_ids` (== so that int matches bigint)
+                input_ids.data.some(x => x == self.config.image_token_index)
+            ) {
+                // TODO: Support multiple image tokens
+                const num_image_tokens = self.config.num_image_tokens;
+                if (!num_image_tokens) {
+                    throw new Error('`num_image_tokens` is missing in the model configuration.');
+                }
+
+                const num_new_tokens = input_ids.dims[1] - (past_length - num_image_tokens);
+                model_inputs.input_ids = input_ids.slice(null, [-num_new_tokens, null]);
+
+                // TODO: The attention mask should be formed from the attention mask passed in model_inputs
+                model_inputs.attention_mask = ones([1, past_length + num_new_tokens]);
+            }
+        }
     }
 
-    // 2. Run
-    let output = await self.forward(model_inputs);
+    return model_inputs;
+}
 
-    // 3. Update
-    beam.prev_model_outputs = output;
+function encoder_decoder_prepare_inputs_for_generation(self, input_ids, model_inputs, generation_config) {
+    if (model_inputs.past_key_values) {
+        input_ids = input_ids.map(x => [x.at(-1)]);
+    }
 
-    return output;
+    return {
+        ...model_inputs,
+        decoder_input_ids: toI64Tensor(input_ids),
+    };
 }
 
-/**
- * Update a beam with a new token ID.
- * @param {Object} beam The beam to update.
- * @param {number} newTokenId The new token ID to add to the beam's output.
- * @private
- */
-function decoderUpdatebeam(beam, newTokenId) {
-    beam.output_token_ids = [...beam.output_token_ids, newTokenId];
-    beam.model_input_ids = new Tensor('int64', [BigInt(newTokenId)], [1, 1]);
+function image_text_to_text_prepare_inputs_for_generation(self, ...args) {
+    if (self.config.is_encoder_decoder) {
+        return encoder_decoder_prepare_inputs_for_generation(self, ...args);
+    } else {
+        return decoder_prepare_inputs_for_generation(self, ...args);
+    }
 }
 
 //////////////////////////////////////////////////
@@ -657,48 +724,63 @@ function decoderUpdatebeam(beam, newTokenId) {
  */
 export class PreTrainedModel extends Callable {
     main_input_name = 'input_ids';
-
+    forward_params = ['input_ids', 'attention_mask'];
     /**
      * Creates a new instance of the `PreTrainedModel` class.
-     * @param {Object} config The model configuration.
-     * @param {any} session session for the model.
+     * @param {import('./configs.js').PretrainedConfig} config The model configuration.
+     * @param {Record<string, any>} sessions The inference sessions for the model.
+     * @param {Record<string, Object>} configs Additional configuration files (e.g., generation_config.json).
      */
-    constructor(config, session) {
+    constructor(config, sessions, configs) {
         super();
 
         this.config = config;
-        this.session = session;
+        this.sessions = sessions;
+        this.configs = configs;
 
         const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this.constructor);
         const modelType = MODEL_TYPE_MAPPING.get(modelName);
 
         this.can_generate = false;
-        this._runBeam = null;
-        this._getStartBeams = null;
-        this._updateBeam = null;
         this._forward = null;
-        if (modelType === MODEL_TYPES.DecoderOnly) {
-            this.can_generate = true;
 
-            this._runBeam = decoderRunBeam;
-            this._getStartBeams = decoderStartBeams;
-            this._updateBeam = decoderUpdatebeam;
-            this._forward = decoderForward;
+        this._prepare_inputs_for_generation = null;
+        switch (modelType) {
+            case MODEL_TYPES.DecoderOnly:
+                this.can_generate = true;
+                this._forward = decoderForward;
+                this._prepare_inputs_for_generation = decoder_prepare_inputs_for_generation;
+                break;
+            case MODEL_TYPES.Seq2Seq:
+            case MODEL_TYPES.Vision2Seq:
+            case MODEL_TYPES.Musicgen:
+                this.can_generate = true;
 
-        } else if (modelType === MODEL_TYPES.Seq2Seq || modelType === MODEL_TYPES.Vision2Seq) {
-            this.can_generate = true;
+                this._forward = seq2seqForward;
+                this._prepare_inputs_for_generation = encoder_decoder_prepare_inputs_for_generation;
+                break;
 
-            this._runBeam = seq2seqRunBeam;
-            this._getStartBeams = seq2seqStartBeams;
-            this._updateBeam = seq2seqUpdatebeam;
-            this._forward = seq2seqForward;
+            case MODEL_TYPES.EncoderDecoder:
+                this._forward = seq2seqForward;
+                break;
+            case MODEL_TYPES.ImageTextToText:
+                this.can_generate = true;
+                this._forward = imageTextToTextForward;
+                this._prepare_inputs_for_generation = image_text_to_text_prepare_inputs_for_generation;
+                break;
 
-        } else if (modelType === MODEL_TYPES.EncoderDecoder) {
-            this._forward = encoderForward;
+            default:
+                // should be MODEL_TYPES.EncoderOnly
+                this._forward = encoderForward;
+                break;
+        }
 
-        } else { // should be MODEL_TYPES.EncoderOnly
-            this._forward = encoderForward;
+        if (this.can_generate) {
+            this.forward_params.push('past_key_values');
         }
+
+        /** @type {import('./configs.js').TransformersJSConfig} */
+        this.custom_config = this.config['transformers.js_config'] ?? {};
     }
 
     /**
@@ -708,11 +790,9 @@ export class PreTrainedModel extends Callable {
     */
     async dispose() {
         const promises = [];
-        for (let key of Object.keys(this)) {
-            const item = this[key];
-            // @ts-ignore
-            if (item instanceof InferenceSession) {
-                promises.push(item.handler.dispose())
+        for (const session of Object.values(this.sessions)) {
+            if (session?.handler?.dispose) {
+                promises.push(session.handler.dispose())
             }
         }
         return await Promise.all(promises);
@@ -729,75 +809,122 @@ export class PreTrainedModel extends Callable {
      *   Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
      *   user or organization name, like `dbmdz/bert-base-german-cased`.
      * - A path to a *directory* containing model weights, e.g., `./my_model_directory/`.
-     * @param {import('./utils/hub.js').PretrainedOptions} options Additional options for loading the model.
+     * @param {import('./utils/hub.js').PretrainedModelOptions} options Additional options for loading the model.
      * 
      * @returns {Promise<PreTrainedModel>} A new instance of the `PreTrainedModel` class.
      */
     static async from_pretrained(pretrained_model_name_or_path, {
-        quantized = true,
         progress_callback = null,
         config = null,
         cache_dir = null,
         local_files_only = false,
         revision = 'main',
         model_file_name = null,
+        subfolder = 'onnx',
+        device = null,
+        dtype = null,
+        use_external_data_format = null,
+        session_options = {},
     } = {}) {
 
         let options = {
-            quantized,
             progress_callback,
             config,
             cache_dir,
             local_files_only,
             revision,
             model_file_name,
+            subfolder,
+            device,
+            dtype,
+            use_external_data_format,
+            session_options,
         }
 
         const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this);
         const modelType = MODEL_TYPE_MAPPING.get(modelName);
 
+        config = options.config = await AutoConfig.from_pretrained(pretrained_model_name_or_path, options);
+
         let info;
         if (modelType === MODEL_TYPES.DecoderOnly) {
             info = await Promise.all([
-                AutoConfig.from_pretrained(pretrained_model_name_or_path, options),
-                constructSession(pretrained_model_name_or_path, options.model_file_name ?? 'decoder_model_merged', options),
-                getModelJSON(pretrained_model_name_or_path, 'generation_config.json', false, options),
+                constructSessions(pretrained_model_name_or_path, {
+                    model: options.model_file_name ?? 'model',
+                }, options),
+                getOptionalConfigs(pretrained_model_name_or_path, {
+                    generation_config: 'generation_config.json',
+                }, options),
             ]);
 
         } else if (modelType === MODEL_TYPES.Seq2Seq || modelType === MODEL_TYPES.Vision2Seq) {
             info = await Promise.all([
-                AutoConfig.from_pretrained(pretrained_model_name_or_path, options),
-                constructSession(pretrained_model_name_or_path, 'encoder_model', options),
-                constructSession(pretrained_model_name_or_path, 'decoder_model_merged', options),
-                getModelJSON(pretrained_model_name_or_path, 'generation_config.json', false, options),
+                constructSessions(pretrained_model_name_or_path, {
+                    model: 'encoder_model',
+                    decoder_model_merged: 'decoder_model_merged',
+                }, options),
+                getOptionalConfigs(pretrained_model_name_or_path, {
+                    generation_config: 'generation_config.json',
+                }, options),
             ]);
 
         } else if (modelType === MODEL_TYPES.MaskGeneration) {
             info = await Promise.all([
-                AutoConfig.from_pretrained(pretrained_model_name_or_path, options),
-                constructSession(pretrained_model_name_or_path, 'vision_encoder', options),
-                constructSession(pretrained_model_name_or_path, 'prompt_encoder_mask_decoder', options),
+                constructSessions(pretrained_model_name_or_path, {
+                    model: 'vision_encoder',
+                    prompt_encoder_mask_decoder: 'prompt_encoder_mask_decoder',
+                }, options),
             ]);
 
         } else if (modelType === MODEL_TYPES.EncoderDecoder) {
             info = await Promise.all([
-                AutoConfig.from_pretrained(pretrained_model_name_or_path, options),
-                constructSession(pretrained_model_name_or_path, 'encoder_model', options),
-                constructSession(pretrained_model_name_or_path, 'decoder_model_merged', options),
+                constructSessions(pretrained_model_name_or_path, {
+                    model: 'encoder_model',
+                    decoder_model_merged: 'decoder_model_merged',
+                }, options),
+            ]);
+
+        } else if (modelType === MODEL_TYPES.ImageTextToText) {
+            const sessions = {
+                embed_tokens: 'embed_tokens',
+                vision_encoder: 'vision_encoder',
+                decoder_model_merged: 'decoder_model_merged',
+            }
+            if (config.is_encoder_decoder) {
+                sessions['model'] = 'encoder_model';
+            }
+            info = await Promise.all([
+                constructSessions(pretrained_model_name_or_path, sessions, options),
+                getOptionalConfigs(pretrained_model_name_or_path, {
+                    generation_config: 'generation_config.json',
+                }, options),
+            ]);
+
+        } else if (modelType === MODEL_TYPES.Musicgen) {
+            info = await Promise.all([
+                constructSessions(pretrained_model_name_or_path, {
+                    model: 'text_encoder',
+                    decoder_model_merged: 'decoder_model_merged',
+                    encodec_decode: 'encodec_decode',
+                }, options),
+                getOptionalConfigs(pretrained_model_name_or_path, {
+                    generation_config: 'generation_config.json',
+                }, options),
             ]);
 
         } else { // should be MODEL_TYPES.EncoderOnly
             if (modelType !== MODEL_TYPES.EncoderOnly) {
-                console.warn(`Model type for '${modelName ?? config?.model_type}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`)
+                console.warn(`Model type for '${modelName ?? config?.model_type}' not found, assuming encoder-only architecture. Please report this at ${GITHUB_ISSUE_URL}.`)
             }
             info = await Promise.all([
-                AutoConfig.from_pretrained(pretrained_model_name_or_path, options),
-                constructSession(pretrained_model_name_or_path, options.model_file_name ?? 'model', options)
+                constructSessions(pretrained_model_name_or_path, {
+                    model: options.model_file_name ?? 'model',
+                }, options),
             ]);
         }
 
         // @ts-ignore
-        return new this(...info);
+        return new this(config, ...info);
     }
 
     /**
@@ -821,7 +948,41 @@ export class PreTrainedModel extends Callable {
     }
 
     /**
-     * @param {import('./utils/generation.js').GenerationConfigType} generation_config 
+     * Get the model's generation config, if it exists.
+     * @returns {GenerationConfig|null} The model's generation config if it exists, otherwise `null`.
+     */
+    get generation_config() {
+        return this.configs?.generation_config ?? null;
+    }
+
+    /**
+     * This function returns a [`LogitsProcessorList`] list object that contains all relevant [`LogitsWarper`]
+     * instances used for multinomial sampling.
+     * @param {GenerationConfig} generation_config The generation config.
+     * @returns {LogitsProcessorList} generation_config 
+     */
+    _get_logits_warper(generation_config) {
+
+        // instantiate warpers list
+        const warpers = new LogitsProcessorList();
+
+        if (generation_config.temperature !== null && generation_config.temperature !== 1.0) {
+            warpers.push(new TemperatureLogitsWarper(generation_config.temperature));
+        }
+        if (generation_config.top_k !== null && generation_config.top_k !== 0) {
+            // TODO: add min_tokens_to_keep
+            warpers.push(new TopKLogitsWarper(generation_config.top_k));
+        }
+        if (generation_config.top_p !== null && generation_config.top_p < 1.0) {
+            // TODO: add min_tokens_to_keep
+            warpers.push(new TopPLogitsWarper(generation_config.top_p));
+        }
+
+        return warpers;
+    }
+
+    /**
+     * @param {GenerationConfig} generation_config 
      * @param {number} input_ids_seq_length The starting sequence length for the input ids.
      * @returns {LogitsProcessorList}
      * @private
@@ -921,19 +1082,22 @@ export class PreTrainedModel extends Callable {
         // }
 
         if (generation_config.begin_suppress_tokens !== null) {
-            let begin_index = (input_ids_seq_length > 1 || generation_config.forced_bos_token_id === null)
+            const begin_index = (input_ids_seq_length > 1 || generation_config.forced_bos_token_id === null)
                 ? input_ids_seq_length
                 : input_ids_seq_length + 1;
 
-            if (generation_config.forced_decoder_ids !== null) {
-                // generation starts after the last token that is forced
-                begin_index += generation_config.forced_decoder_ids[generation_config.forced_decoder_ids.length - 1][0];
-            }
             processors.push(new SuppressTokensAtBeginLogitsProcessor(generation_config.begin_suppress_tokens, begin_index));
         }
 
-        if (generation_config.forced_decoder_ids !== null) {
-            processors.push(new ForceTokensLogitsProcessor(generation_config.forced_decoder_ids));
+        // DEPRECATED: https://github.com/huggingface/transformers/pull/29485
+        // if (generation_config.forced_decoder_ids !== null) {
+        //     processors.push(new ForceTokensLogitsProcessor(generation_config.forced_decoder_ids));
+        // }
+
+
+        // 8. prepare batched CFG externally
+        if (generation_config.guidance_scale !== null && generation_config.guidance_scale > 1) {
+            processors.push(new ClassifierFreeGuidanceLogitsProcessor(generation_config.guidance_scale));
         }
 
         if (logits_processor !== null) {
@@ -951,287 +1115,473 @@ export class PreTrainedModel extends Callable {
     /**
      * This function merges multiple generation configs together to form a final generation config to be used by the model for text generation.
      * It first creates an empty `GenerationConfig` object, then it applies the model's own `generation_config` property to it. Finally, if a `generation_config` object was passed in the arguments, it overwrites the corresponding properties in the final config with those of the passed config object.
-     * @param {import('./utils/generation.js').GenerationConfigType} generation_config A `GenerationConfig` object containing generation parameters.
-     * @returns {import('./utils/generation.js').GenerationConfigType} The final generation config object to be used by the model for text generation.
+     * @param {GenerationConfig|null} generation_config A `GenerationConfig` object containing generation parameters.
+     * @param {Object} kwargs Additional generation parameters to be used in place of those in the `generation_config` object.
+     * @returns {GenerationConfig} The final generation config object to be used by the model for text generation.
      */
-    _get_generation_config(generation_config) {
+    _prepare_generation_config(generation_config, kwargs, cls = GenerationConfig) {
         // Create empty generation config (contains defaults)
         // We pass `this.config` so that if `eos_token_id` or `bos_token_id` exist in the model's config, we will use them
-        let gen_config = new GenerationConfig(this.config);
+        const config = { ...this.config };
+        for (const key of ["decoder", "generator", "text_config"]) {
+            // Special case: some models have generation attributes set in the decoder.
+            // Use them if still unset in the generation config.
+            if (key in config) {
+                Object.assign(config, config[key]);
+            }
+        }
+
+        const gen_config = new cls(config);
 
         // Apply model's generation config, if it exists
-        if ('generation_config' in this) {
-            Object.assign(gen_config, this.generation_config);
-        }
+        Object.assign(gen_config, this.generation_config ?? {});
 
-        // Finally, use any generation config specified by the user
+        // Next, use any generation config specified by the user
         // when calling `generate`
-        if (generation_config !== null) {
+        if (generation_config) {
             Object.assign(gen_config, generation_config);
         }
+
+        // Finally, if any kwargs were passed, use them to overwrite
+        if (kwargs) {
+            Object.assign(gen_config, pick(kwargs, Object.getOwnPropertyNames(gen_config)));
+        }
+
         return gen_config;
     }
 
     /**
-     * @typedef {import('./utils/maths.js').TypedArray} TypedArray
+     * 
+     * @param {GenerationConfig} generation_config 
+     * @param {StoppingCriteriaList} [stopping_criteria=null] 
      */
+    _get_stopping_criteria(generation_config, stopping_criteria = null) {
+        const criteria = new StoppingCriteriaList();
+
+        if (generation_config.max_length !== null) {
+            criteria.push(new MaxLengthCriteria(
+                generation_config.max_length,
+                this.config.max_position_embeddings ?? null,
+            ));
+        }
+        // if (generation_config.max_time !== null) {
+        //     criteria.push(new MaxTimeCriteria(generation_config.max_time));
+        // }
+        if (generation_config.eos_token_id !== null) {
+            criteria.push(new EosTokenCriteria(generation_config.eos_token_id));
+        }
+
+        if (stopping_criteria) {
+            criteria.extend(stopping_criteria);
+        }
+        return criteria;
+
+    }
 
     /**
-     * @typedef {{ sequences: Tensor, decoder_attentions: Tensor, cross_attentions: Tensor }} EncoderDecoderOutput
-     * @typedef {Object} DecoderOutput
-     * 
-     * Generates text based on the given inputs and generation configuration using the model.
-     * @param {Tensor|Array|TypedArray} inputs An array of input token IDs.
-     * @param {Object|GenerationConfig|null} generation_config The generation configuration to use. If null, default configuration will be used.
-     * @param {Object|null} logits_processor An optional logits processor to use. If null, a new LogitsProcessorList instance will be created.
-     * @param {Object} options options
-     * @param {Object} [options.inputs_attention_mask=null] An optional attention mask for the inputs.
-     * @returns {Promise<number[][]|EncoderDecoderOutput|DecoderOutput>} An array of generated output sequences, where each sequence is an array of token IDs.
-     * @throws {Error} Throws an error if the inputs array is empty.
-     */
-    async generate(
-        inputs,
-        generation_config = null,
-        logits_processor = null,
-        {
-            inputs_attention_mask = null
-        } = {},
-    ) {
+     * Confirms that the model class is compatible with generation.
+     * If not, raises an exception that points to the right class to use.
+     */
+    _validate_model_class() {
         if (!this.can_generate) {
+            const generate_compatible_mappings = [
+                MODEL_FOR_CAUSAL_LM_MAPPING_NAMES,
+                // MODEL_FOR_CAUSAL_IMAGE_MODELING_MAPPING, // TODO
+                MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES,
+                MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES,
+                MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES,
+            ];
+
             const modelName = MODEL_CLASS_TO_NAME_MAPPING.get(this.constructor);
-            let errorMessage = `The current model class (${modelName}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`
 
+            const generate_compatible_classes = new Set();
             const modelType = this.config.model_type;
-            const possibleInfo =
-                MODEL_WITH_LM_HEAD_MAPPING_NAMES.get(modelType)
-                ?? MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES.get(modelType)
-                ?? MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES.get(modelType)
-                // ?? MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES.get(modelType) // TODO
-                ?? MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES.get(modelType);
-
-            if (possibleInfo) {
-                // TODO: support multiple possible classes
-                errorMessage += ` Please use the following class instead: '${possibleInfo[0]}'`;
+            for (const model_mapping of generate_compatible_mappings) {
+                const supported_models = model_mapping.get(modelType);
+                if (supported_models) {
+                    generate_compatible_classes.add(supported_models[0]);
+                }
+            }
+
+            let errorMessage = `The current model class (${modelName}) is not compatible with \`.generate()\`, as it doesn't have a language model head.`
+            if (generate_compatible_classes.size > 0) {
+                errorMessage += ` Please use the following class instead: ${[...generate_compatible_classes].join(', ')}`;
             }
             throw Error(errorMessage);
         }
+    }
 
-        if (!(inputs instanceof Tensor) && !isTypedArray(inputs) && !Array.isArray(inputs)) {
-            throw Error(`\`inputs\` must be a Tensor, TypedArray, or Array, but is "${inputs.constructor.name}".`);
-        }
+    prepare_inputs_for_generation(...args) {
+        return this._prepare_inputs_for_generation(this, ...args);
+    }
 
-        let input_ids_seq_length;
+    /**
+     * 
+     * @param {Object} inputs
+     * @param {bigint[][]} inputs.generated_input_ids
+     * @param {Object} inputs.outputs
+     * @param {Object} inputs.model_inputs
+     * @param {boolean} inputs.is_encoder_decoder
+     * @returns {Object} The updated model inputs for the next generation iteration.
+     */
+    _update_model_kwargs_for_generation({ generated_input_ids, outputs, model_inputs, is_encoder_decoder }) {
+        // update past_key_values
+        model_inputs['past_key_values'] = this.getPastKeyValues(outputs, model_inputs.past_key_values);
+
+        // update inputs for next run
+        model_inputs['input_ids'] = new Tensor('int64', generated_input_ids.flat(), [generated_input_ids.length, 1]);
+
+        if (!is_encoder_decoder) {
+            // update attention mask
+            model_inputs.attention_mask = cat(
+                [
+                    model_inputs.attention_mask,
+                    ones([model_inputs.attention_mask.dims[0], 1]),
+                ], 1
+            );
+        } else if ('decoder_attention_mask' in model_inputs) {
+            // TODO: update decoder attention mask if the model requires it
+        }
 
-        // Prepare `input_ids` which will be used for auto-regressive generation
-        // TODO: Update to align with HF transformers' implementation
-        if (this.config.is_encoder_decoder) {
-            // Generating from the encoder outputs
-            input_ids_seq_length = 0;
+        // force recreate position_ids in next iteration
+        model_inputs['position_ids'] = null;
 
-        } else {
-            input_ids_seq_length = inputs instanceof Tensor ? inputs.dims.at(-1) : inputs.length;
+        return model_inputs;
+    }
 
-            // decoder-only
-            if (input_ids_seq_length === 0) {
-                throw Error("Must supply a non-empty array of input token ids.")
+    /**
+     * This function extracts the model-specific `inputs` for generation.
+     * @param {Object} params
+     * @param {Tensor} [params.inputs=null]
+     * @param {number} [params.bos_token_id=null]
+     * @param {Record<string, Tensor|number[]>} [params.model_kwargs]
+     * @returns {{inputs_tensor: Tensor, model_inputs: Record<string, Tensor>, model_input_name: string}} The model-specific inputs for generation.
+     */
+    _prepare_model_inputs({ inputs, bos_token_id, model_kwargs }) {
+        const model_inputs = pick(model_kwargs, this.forward_params);
+        const input_name = this.main_input_name;
+        if (input_name in model_inputs) {
+            if (inputs) {
+                throw new Error(
+                    "`inputs`: {inputs}` were passed alongside {input_name} which is not allowed. " +
+                    "Make sure to either pass {inputs} or {input_name}=..."
+                );
             }
+        } else {
+            model_inputs[input_name] = inputs;
         }
 
-        // Update generation config with defaults
-        generation_config = this._get_generation_config(generation_config);
-
-        logits_processor = logits_processor ?? new LogitsProcessorList()
+        const inputs_tensor = model_inputs[input_name];
 
-        // Update logits processor
-        logits_processor = this._get_logits_processor(
-            generation_config,
-            input_ids_seq_length,
-            logits_processor
-        )
+        return { inputs_tensor, model_inputs, model_input_name: input_name };
+    }
 
-        /** @type {number[]} */
-        let eos_token_ids = generation_config.eos_token_id;
-        if (eos_token_ids !== null && !Array.isArray(eos_token_ids)) {
-            eos_token_ids = [eos_token_ids];
+    async _prepare_encoder_decoder_kwargs_for_generation({ inputs_tensor, model_inputs, model_input_name, generation_config }) {
+        if (
+            this.sessions['model'].inputNames.includes('inputs_embeds')
+            && !model_inputs.inputs_embeds
+            && '_prepare_inputs_embeds' in this
+        ) {
+            // Encoder expects `inputs_embeds` instead of `input_ids`
+            const { input_ids, pixel_values, attention_mask, ...kwargs } = model_inputs;
+            // @ts-ignore
+            const prepared_inputs = await this._prepare_inputs_embeds(model_inputs);
+            model_inputs = {
+                ...kwargs,
+                ...pick(prepared_inputs, ['inputs_embeds', 'attention_mask']),
+            };
         }
+        let { last_hidden_state } = await encoderForward(this, model_inputs);
 
-        // TODO implement early_stopping
-        // https://huggingface.co/blog/how-to-generate
+        // for classifier free guidance we need to add a 'null' input to our encoder hidden states
+        if (generation_config.guidance_scale !== null && generation_config.guidance_scale > 1) {
 
-        let numOutputTokens = 1;
-        const maxOutputTokens = numOutputTokens + (generation_config.max_new_tokens ?? Infinity);
+            last_hidden_state = cat([
+                last_hidden_state,
+                full_like(last_hidden_state, 0.0),
+            ], 0);
 
-        // Only use max length if max_new_tokens is not provided
-        const useMaxLength = Number.isInteger(generation_config.max_length) && (generation_config.max_new_tokens ?? null) === null;
-        let sampler = Sampler.getSampler(generation_config);
+            if ('attention_mask' in model_inputs) {
+                model_inputs['attention_mask'] = cat([
+                    model_inputs['attention_mask'],
+                    zeros_like(model_inputs['attention_mask']),
+                ], 0);
+            }
 
-        // @ts-ignore
-        let beams = this.getStartBeams(inputs, generation_config, numOutputTokens, inputs_attention_mask);
-
-        while (beams.some(x => !x.done) && numOutputTokens < maxOutputTokens) {
-            let newest_beams = [];
-            for (let beam of beams) {
-                if (beam.done) {
-                    // Add this beam back into the pool
-                    newest_beams.push(beam);
-                    continue
-                }
-                if (useMaxLength && beam.output_token_ids.length >= generation_config.max_length) {
-                    // Set this beam to done and add it back into the pool
-                    beam.done = true;
-                    newest_beams.push(beam);
-                    continue
+        } else if (model_inputs.decoder_input_ids) {
+            // Ensure that the encoder outputs have the same batch size as the decoder inputs,
+            // allowing for more efficient batched generation for single inputs
+            const decoder_input_ids_batch_size = toI64Tensor(model_inputs.decoder_input_ids).dims[0];
+            if (decoder_input_ids_batch_size !== last_hidden_state.dims[0]) {
+                if (last_hidden_state.dims[0] !== 1) {
+                    throw new Error(
+                        `The encoder outputs have a different batch size (${last_hidden_state.dims[0]}) than the decoder inputs (${decoder_input_ids_batch_size}).`
+                    )
                 }
+                last_hidden_state = cat(Array.from({ length: decoder_input_ids_batch_size }, () => last_hidden_state), 0);
+            }
+        }
+        model_inputs['encoder_outputs'] = last_hidden_state;
 
-                // @ts-ignore
-                let output = await this.runBeam(beam);
+        return model_inputs;
+    }
 
-                // add attentions/scores to beam only if user requested
-                if (generation_config.output_attentions) {
-                    this.addAttentionsToBeam(beam, output);
-                }
-                if (generation_config.output_scores) {
-                    // TODO add
+    /**
+     * Prepares `decoder_input_ids` for generation with encoder-decoder models
+     * @param {*} param0 
+     */
+    _prepare_decoder_input_ids_for_generation({ batch_size, model_input_name, model_kwargs, decoder_start_token_id, bos_token_id, generation_config }) {
+        let { decoder_input_ids, ...model_inputs } = model_kwargs;
+
+        // Prepare input ids if the user has not defined `decoder_input_ids` manually.
+        if (!decoder_input_ids) {
+            decoder_start_token_id ??= bos_token_id;
+
+            if (this.config.model_type === 'musicgen') {
+                // Custom logic (TODO: move to Musicgen class)
+                decoder_input_ids = Array.from({
+                    length: batch_size * this.config.decoder.num_codebooks
+                }, () => [decoder_start_token_id]);
+
+            } else if (Array.isArray(decoder_start_token_id)) {
+                if (decoder_start_token_id.length !== batch_size) {
+                    throw new Error(
+                        `\`decoder_start_token_id\` expcted to have length ${batch_size} but got ${decoder_start_token_id.length}`
+                    )
                 }
+                decoder_input_ids = decoder_start_token_id;
+            } else {
+                decoder_input_ids = Array.from({
+                    length: batch_size,
+                }, () => [decoder_start_token_id]);
+            }
+        } else if (!Array.isArray(decoder_input_ids[0])) {
+            // Correct batch size
+            decoder_input_ids = Array.from({
+                length: batch_size,
+            }, () => decoder_input_ids);
+        }
 
-                // Logits are of the form [batch_size, out_seq_length, vocab_size]
-                // In most cases, this will be [batch_size, 1, vocab_size]
-                // So, we select the last token's logits:
-                // (equivalent to `logits = outputs.logits[:, -1, :]`)
-                let logits = output.logits.slice(null, -1, null);
+        decoder_input_ids = toI64Tensor(decoder_input_ids);
+        model_kwargs['decoder_attention_mask'] = ones_like(decoder_input_ids);
 
-                // Apply logits processor
-                logits_processor(beam.output_token_ids, logits);
+        return { input_ids: decoder_input_ids, model_inputs };
+    }
 
-                let sampledTokens = sampler(logits);
-                for (let [newTokenId, logProb] of sampledTokens) {
-                    // use previous beam as a starting point
-                    let newBeam = { ...beam };
+    /**
+     * Generates sequences of token ids for models with a language modeling head.
+     * @param {import('./generation/parameters.js').GenerationFunctionParameters} options
+     * @returns {Promise<ModelOutput|Tensor>} The output of the model, which can contain the generated token ids, attentions, and scores.
+     */
+    async generate({
+        inputs = null,
+        generation_config = null,
+        logits_processor = null,
+        stopping_criteria = null,
+        streamer = null,
 
-                    // update new beam
-                    // @ts-ignore
-                    this.updateBeam(newBeam, newTokenId);
+        // inputs_attention_mask = null,
+        ...kwargs
+    }) {
+        this._validate_model_class();
 
-                    newBeam.score += logProb;
+        // Update generation config with defaults and kwargs
+        generation_config = this._prepare_generation_config(generation_config, kwargs);
 
-                    if (eos_token_ids && eos_token_ids.includes(newTokenId)) {
-                        newBeam.done = true;
-                    }
+        // 3. Define model inputs
+        let { inputs_tensor, model_inputs, model_input_name } = this._prepare_model_inputs({
+            inputs,
+            model_kwargs: kwargs,
+        });
 
-                    newest_beams.push(newBeam);
-                }
-            }
-            ++numOutputTokens;
+        const is_encoder_decoder = this.config.is_encoder_decoder;
 
-            // Next, we get the best beams, per ID
-            newest_beams = this.groupBeams(newest_beams).map(
-                group => group
-                    .sort((a, b) => b.score - a.score)      // sort by score
-                    .slice(0, generation_config.num_beams)  // remove outside beam width
-            );
+        // 4. Define other model kwargs
+        if (!is_encoder_decoder) {
+            // decoder-only models should use left-padding for generation
+        } else if (!('encoder_outputs' in model_inputs)) {
+            // if model is encoder decoder encoder_outputs are created
+            // and added to `model_kwargs`
+            model_inputs = await this._prepare_encoder_decoder_kwargs_for_generation(
+                { inputs_tensor, model_inputs, model_input_name, generation_config }
+            )
+        }
 
-            // Flatten beams
-            beams = newest_beams.flat();
+        // 5. Prepare `input_ids` which will be used for auto-regressive generation
+        // TODO: Update to align with HF transformers' implementation
+        let input_ids;
+        if (is_encoder_decoder) {
+            // Generating from the encoder outputs
+            ({ input_ids, model_inputs } = this._prepare_decoder_input_ids_for_generation({
+                batch_size: model_inputs[model_input_name].dims.at(0),
+                model_input_name,
+                model_kwargs: model_inputs,
+                decoder_start_token_id: generation_config.decoder_start_token_id,
+                bos_token_id: generation_config.bos_token_id,
+                generation_config,
+            }));
+        } else {
+            input_ids = model_inputs[model_input_name]
+        }
 
-            // Run callback
-            if (generation_config.callback_function) {
-                generation_config.callback_function(beams);
-            }
+        // 6. Prepare `max_length` depending on other stopping criteria.
+        let input_ids_length = input_ids.dims.at(-1);
+
+        if (generation_config.max_new_tokens !== null) {
+            generation_config.max_length = input_ids_length + generation_config.max_new_tokens;
         }
 
-        // TODO: Ensure that we can return non-batched outputs
+        // input_ids_length = model_inputs[model_input_name].dims.at(1);
+        // // inputs instanceof Tensor ?  : inputs.length;
 
-        const groupedBeams = this.groupBeams(beams);
+        // // decoder-only
+        // if (input_ids_length === 0) {
+        //     throw Error("Must supply a non-empty array of input token ids.")
+        // }
 
-        const getFlattened = (key) => groupedBeams.map(
-            batch => {
-                if (generation_config.num_return_sequences > 1) {
-                    return batch.slice(0, generation_config.num_return_sequences).map(x => x[key]);
-                } else {
-                    return [batch[0][key]];
-                }
-            }
-        ).flat(); // Flatten across batches (depth=1)
+        // let decoder_input_ids =
+        // generation_config.decoder_input_ids
+        // ?? generation_config.decoder_start_token_id
+        // ?? generation_config.bos_token_id
+        // ?? generation_config.eos_token_id;
 
-        const sequences = getFlattened('output_token_ids'); // [1, seqLength]
+        // Update logits processor
+        // 8. prepare distribution pre_processing samplers
+        const prepared_logits_processor = this._get_logits_processor(
+            generation_config,
+            input_ids_length,
+            logits_processor,
+        )
 
-        if (generation_config.return_dict_in_generate) {
-            // NOTE: `decoder_attentions` and `cross_attentions` should be:
-            //    list (one element for each generated token)
-            //    of list (one element for each layer of the decoder)
-            //    of torch.FloatTensor of shape (batch_size, num_heads, generated_length, sequence_length)
-            // However, since we are only generating one batch at a time, they are of the form:
-            //   list (batches)
-            //   of list (one element for each generated token)
-            //   of list (one element for each layer of the decoder)
-            //   of torch.FloatTensor of shape (1, num_heads, generated_length, sequence_length)
-            // 
-            // TODO: In future (when true parallelism, we should be able to return the correct shape)
-
-            const decoder_attentions = getFlattened('decoder_attentions');
-            const cross_attentions = getFlattened('cross_attentions');
+        // 9. prepare stopping criteria
+        const prepared_stopping_criteria = this._get_stopping_criteria(
+            generation_config, stopping_criteria
+        )
 
-            return {
-                sequences,
+        // /** @type {number[]} */
+        // let eos_token_ids = generation_config.eos_token_id;
+        // if (eos_token_ids !== null && !Array.isArray(eos_token_ids)) {
+        //     eos_token_ids = [eos_token_ids];
+        // }
 
-                decoder_attentions,
-                cross_attentions,
-            }
-        } else {
-            return sequences;
+        const numInputs = model_inputs[model_input_name].dims.at(0);
+
+        // TODO:
+        // done is a list of booleans to keep track of which inputs are done
+        // const done = new Array(numInputs).fill(false);
+        // For efficiency purposes, we remove completed rows from model_inputs
+        // when the beam is complete, and we keep track of the row index
+        // const rowIndexToBatchIndex = new Map();
+
+        const sampler = LogitsSampler.getSampler(generation_config);
+
+        // TODO make > numInputs
+        const scores = new Array(numInputs).fill(0);
+        /** @type {bigint[][]} */
+        const all_input_ids = input_ids.tolist();
+        if (streamer) {
+            streamer.put(all_input_ids);
         }
-    }
+        // const all_generated_input_ids = Array.from({ length: numInputs }, () => []);
+
+        // NOTE: For now, we don't support spawning new beams
+        // TODO: when we do, we simply copy past key values and accumulate into single large tensor
+
+        ////////////////////////////////////////////////////
+        // Generic search which handles 4 generation modes:
+        // - GenerationMode.GREEDY_SEARCH
+        // - GenerationMode.SAMPLE
+        // - GenerationMode.BEAM_SEARCH
+        // - GenerationMode.BEAM_SAMPLE
+        ////////////////////////////////////////////////////
+        let outputs;
+        let attentions = {};
+        while (true) {
+            // prepare model inputs
+            model_inputs = this.prepare_inputs_for_generation(all_input_ids, model_inputs, generation_config);
+            outputs = await this.forward(model_inputs);
+
+            if (generation_config.output_attentions && generation_config.return_dict_in_generate) {
+                // Get attentions if they are present
+                const token_attentions = this.getAttentions(outputs);
+                for (const key in token_attentions) {
+                    if (!(key in attentions)) {
+                        attentions[key] = [];
+                    }
+                    attentions[key].push(token_attentions[key]);
+                }
+            }
 
-    /**
-     * Helper function to add attentions to beam
-     * @param {Object} beam 
-     * @param {Object} output
-     * @private 
-     */
-    addAttentionsToBeam(beam, output) {
-        if (this.config.is_encoder_decoder) {
-            if (!output.cross_attentions || output.cross_attentions.length === 0) {
-                throw Error(
-                    "`output_attentions` is true, but the model did not produce cross-attentions. " +
-                    "This is most likely because the model was not exported with `output_attentions=True`."
-                )
+            // Logits are of the form [batch_size, out_seq_length, vocab_size]
+            // In most cases, this will be [batch_size, 1, vocab_size]
+            // So, we select the last token's logits:
+            // (equivalent to `logits = outputs.logits[:, -1, :]`)
+            const logits = outputs.logits.slice(null, -1, null);
+
+            const next_tokens_scores = prepared_logits_processor(all_input_ids, logits);
+
+            /** @type {[bigint][]} */
+            const generated_input_ids = [];
+            // const new_kv_cache = [];// NOTE: Only used for beam search when concatenating new kv
+            // Loop over each batch
+            for (let batch_idx = 0; batch_idx < next_tokens_scores.dims.at(0); ++batch_idx) {
+                const logs = next_tokens_scores[batch_idx];
+
+                const sampledTokens = await sampler(logs);
+                for (const [newTokenId, logProb] of sampledTokens) {
+                    const bigint = BigInt(newTokenId);
+                    // TODO: If branching, use previous beam as a starting point
+                    // update generated ids, model inputs, and length for next step
+                    scores[batch_idx] += logProb;
+                    all_input_ids[batch_idx].push(bigint);
+                    generated_input_ids.push([bigint]);
+
+                    // TODO: Support beam search
+                    break;
+                }
             }
-            if (!beam.cross_attentions) {
-                beam.cross_attentions = [];
+            if (streamer) {
+                streamer.put(generated_input_ids);
             }
-            beam.cross_attentions.push(output.cross_attentions);
-        }
 
-        if (!output.decoder_attentions || output.decoder_attentions.length === 0) {
-            throw Error(
-                "`output_attentions` is true, but the model did not produce decoder-attentions. " +
-                "This is most likely because the model was not exported with `output_attentions=True`."
-            )
+            const stop = prepared_stopping_criteria(all_input_ids);
+            if (stop.every(x => x)) {
+                break;
+            }
+
+            model_inputs = this._update_model_kwargs_for_generation({
+                generated_input_ids, outputs, model_inputs, is_encoder_decoder,
+            });
         }
-        if (!beam.decoder_attentions) {
-            beam.decoder_attentions = [];
+
+        if (streamer) {
+            streamer.end();
         }
-        beam.decoder_attentions.push(output.decoder_attentions);
-    }
 
-    /**
-     * Groups an array of beam objects by their ids.
-     *
-     * @param {Array} beams The array of beam objects to group.
-     * @returns {Array} An array of arrays, where each inner array contains beam objects with the same id.
-     */
-    groupBeams(beams) {
-        // Group beams by their ids
-        const groups = Object.create(null);
-        for (const obj of beams) {
-            if (groups[obj.id] === undefined) {
-                groups[obj.id] = [obj];
-            } else {
-                groups[obj.id].push(obj);
+        // Retrieve and dispose all final past key values (including encoder attentions)
+        const past_key_values = this.getPastKeyValues(outputs, model_inputs.past_key_values, true);
+
+        // TODO: ensure all_input_ids is padded correctly...
+        const sequences = new Tensor('int64', all_input_ids.flat(), [all_input_ids.length, all_input_ids[0].length]);
+
+        if (generation_config.return_dict_in_generate) {
+            return {
+                sequences,
+                past_key_values,
+                ...attentions,
+                // TODO:
+                // scores,
+                // logits,
             }
+        } else {
+            // Dispose all remaining tensors
+            for (const tensor of Object.values(outputs)) {
+                if (tensor.location === 'gpu-buffer') {
+                    tensor.dispose();
+                }
+            }
+            return sequences;
         }
-
-        return Object.values(groups);
     }
 
     /**
@@ -1241,47 +1591,55 @@ export class PreTrainedModel extends Callable {
      * @param {Object} pastKeyValues The previous past key values.
      * @returns {Object} An object containing past key values.
      */
-    getPastKeyValues(decoderResults, pastKeyValues) {
-
+    getPastKeyValues(decoderResults, pastKeyValues, disposeEncoderPKVs = false) {
         const pkvs = Object.create(null);
 
         for (const name in decoderResults) {
             if (name.startsWith('present')) {
-                let newName = name.replace('present', 'past_key_values');
-
-                if (pastKeyValues && name.includes('encoder')) {
-                    // Optimization introduced by optimum to reuse past key values. So, we just replace the constant
-                    // outputs with the previous past key values.
+                const newName = name.replace('present', 'past_key_values');
+                const is_encoder_pkv = name.includes('encoder');
+                if (is_encoder_pkv && pastKeyValues) {
+                    // Optimization introduced by optimum to reuse past key values.
+                    // So, we just replace the constant outputs (`decoderResults[name]`) with the previous past key values.
                     // https://github.com/huggingface/optimum/blob/0bf2c05fb7e1182b52d21b703cfc95fd9e4ea3dc/optimum/onnxruntime/base.py#L677-L704
                     pkvs[newName] = pastKeyValues[newName];
-                } else {
+                } else { // decoder or using first encoder PKVs
                     pkvs[newName] = decoderResults[name];
                 }
+
+                if (pastKeyValues && (!is_encoder_pkv || disposeEncoderPKVs)) {
+                    // - Always dispose decoder PKVs
+                    // - Only dispose encoder past key values when requested (after generation)
+                    const t = pastKeyValues[newName];
+                    if (t.location === 'gpu-buffer') {
+                        t.dispose();
+                    }
+                }
             }
         }
         return pkvs;
     }
 
     /**
-     * Returns an object containing attentions from the given decoder results object.
+     * Returns an object containing attentions from the given model output object.
      *
-     * @param {Object} decoderResults The decoder results object.
-     * @returns {Object} An object containing attentions.
+     * @param {Object} model_output The output of the model.
+     * @returns {{cross_attentions?: Tensor[]}} An object containing attentions.
      */
-    getAttentions(decoderResults) {
-        const attns = Object.create(null);
+    getAttentions(model_output) {
+        const attentions = {};
 
-        for (const attnName of ['cross_attentions', 'decoder_attentions']) {
-            const result = [];
-            for (const name in decoderResults) {
+        for (const attnName of ['cross_attentions', 'encoder_attentions', 'decoder_attentions']) {
+            for (const name in model_output) {
                 if (name.startsWith(attnName)) {
-                    const index = name.split('.').pop()
-                    result[index] = decoderResults[name];
+                    if (!(attnName in attentions)) {
+                        attentions[attnName] = [];
+                    }
+                    attentions[attnName].push(model_output[name]);
                 }
             }
-            attns[attnName] = result;
         }
-        return attns;
+        return attentions;
     }
 
     /**
@@ -1294,93 +1652,34 @@ export class PreTrainedModel extends Callable {
         if (pastKeyValues) {
             Object.assign(decoderFeeds, pastKeyValues)
         } else {
-            // TODO support batches (i.e., batch_size > 1)
-            const batch_size = 1;
+            const session = this.sessions['decoder_model_merged'] ?? this.sessions['model'];
+            const dtype = session?.config?.kv_cache_dtype ?? 'float32';
+            const empty = (dtype === 'float16') ? new Uint16Array() : [];
 
-            // @ts-ignore
-            if (this.config.is_encoder_decoder && (this.add_encoder_pkv ?? true)) {
-                // @ts-ignore
-                let encoder_dims = [batch_size, this.num_encoder_heads, 0, this.encoder_dim_kv];
-                // @ts-ignore
-                let decoder_dims = [batch_size, this.num_decoder_heads, 0, this.decoder_dim_kv];
-                // @ts-ignore
-                for (let i = 0; i < this.num_decoder_layers; ++i) {
-                    decoderFeeds[`past_key_values.${i}.encoder.key`] = new Tensor('float32', [], encoder_dims)
-                    decoderFeeds[`past_key_values.${i}.encoder.value`] = new Tensor('float32', [], encoder_dims)
-                    decoderFeeds[`past_key_values.${i}.decoder.key`] = new Tensor('float32', [], decoder_dims)
-                    decoderFeeds[`past_key_values.${i}.decoder.value`] = new Tensor('float32', [], decoder_dims)
-                }
-            } else if (this.config.model_type === 'falcon') {
-                // NOTE: Custom implementation for Falcon
-                // @ts-ignore
-                let dims = [batch_size * this.num_heads, 0, this.dim_kv]
-                // @ts-ignore
-                for (let i = 0; i < this.num_layers; ++i) {
-                    decoderFeeds[`past_key_values.${i}.key`] = new Tensor('float32', [], dims)
-                    decoderFeeds[`past_key_values.${i}.value`] = new Tensor('float32', [], dims)
-                }
-            } else if (this.config.multi_query) { // e.g., for `gpt_bigcode`
-                // @ts-ignore
-                let dims = [batch_size * this.num_heads, 0, 2 * this.dim_kv]
-                // @ts-ignore
-                for (let i = 0; i < this.num_layers; ++i) {
-                    decoderFeeds[`past_key_values.${i}.key_value`] = new Tensor('float32', [], dims)
-                }
-            } else if (this.config.model_type === 'bloom') {
-                // NOTE: Custom implementation for Bloom
-
-                // @ts-ignore
-                let keyDims = [batch_size * this.num_heads, this.dim_kv, 0] // [batch_size x num_heads,64,past_sequence_length]
-                // @ts-ignore
-                let valueDims = [batch_size * this.num_heads, 0, this.dim_kv] // [batch_size x num_heads,past_sequence_length,64]
-                // @ts-ignore
-                for (let i = 0; i < this.num_layers; ++i) {
-                    decoderFeeds[`past_key_values.${i}.key`] = new Tensor('float32', [], keyDims)
-                    decoderFeeds[`past_key_values.${i}.value`] = new Tensor('float32', [], valueDims)
-                }
-            } else { // Decoder-only
-                // @ts-ignore
-                let dims = [batch_size, this.num_heads, 0, this.dim_kv]
-                // @ts-ignore
-                for (let i = 0; i < this.num_layers; ++i) {
-                    decoderFeeds[`past_key_values.${i}.key`] = new Tensor('float32', [], dims)
-                    decoderFeeds[`past_key_values.${i}.value`] = new Tensor('float32', [], dims)
-                }
+            const shapes = getKeyValueShapes(this.config);
+
+            for (const name in shapes) {
+                decoderFeeds[name] = new Tensor(dtype, empty, shapes[name]);
             }
         }
     }
 
-    /**
-     * Initializes and returns the beam for text generation task
-     * @param {Tensor} inputTokenIds The input token ids.
-     * @param {Object} generation_config The generation config.
-     * @param {number} numOutputTokens The number of tokens to be generated.
-     * @param {Tensor} inputs_attention_mask Optional input attention mask.
-     * @returns {any} A Beam object representing the initialized beam.
-     * @private
-     */
-    getStartBeams(inputTokenIds, generation_config, numOutputTokens, inputs_attention_mask) {
-        return this._getStartBeams(this, inputTokenIds, generation_config, numOutputTokens, inputs_attention_mask)
-    }
-
-    /**
-     * Runs a single step of the beam search generation algorithm.
-     * @param {any} beam The current beam being generated.
-     * @returns {Promise<any>} The updated beam after a single generation step.
-     * @private
-     */
-    async runBeam(beam) {
-        return await this._runBeam(this, beam);
+    async encode_image({ pixel_values }) {
+        // image_inputs === { pixel_values }
+        const features = (await sessionRun(this.sessions['vision_encoder'], { pixel_values })).image_features;
+        if (!this.config.num_image_tokens) {
+            console.warn(
+                'The number of image tokens was not set in the model configuration. ' +
+                `Setting it to the number of features detected by the vision encoder (${features.dims[1]}).`
+            )
+            this.config.num_image_tokens = features.dims[1];
+        }
+        return features;
     }
 
-    /**
-     * Update a beam with a new token ID.
-     * @param {Object} beam The beam to update.
-     * @param {number} newTokenId The new token ID to add to the beam's output.
-     * @private
-     */
-    updateBeam(beam, newTokenId) {
-        return this._updateBeam(beam, newTokenId);
+    async encode_text({ input_ids }) {
+        // text_inputs === { input_ids, attention_mask }
+        return (await sessionRun(this.sessions['embed_tokens'], { input_ids })).inputs_embeds;
     }
 }
 
@@ -2238,36 +2537,23 @@ export class AlbertForMaskedLM extends AlbertPreTrainedModel {
 
 //////////////////////////////////////////////////
 // T5 models
-export class T5PreTrainedModel extends PreTrainedModel { };
+export class T5PreTrainedModel extends PreTrainedModel {
+    forward_params = [
+        'input_ids',
+        'attention_mask',
+        'encoder_outputs',
+        'decoder_input_ids',
+        'decoder_attention_mask',
+        'past_key_values',
+    ];
+};
 
 export class T5Model extends T5PreTrainedModel { }
 
 /**
  * T5Model is a class representing a T5 model for conditional generation.
  */
-export class T5ForConditionalGeneration extends T5PreTrainedModel {
-
-    /**
-     * Creates a new instance of the `T5ForConditionalGeneration` class.
-     * @param {Object} config The model configuration.
-     * @param {any} session session for the model.
-     * @param {any} decoder_merged_session session for the decoder.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, decoder_merged_session, generation_config) {
-        super(config, session);
-        this.decoder_merged_session = decoder_merged_session;
-        this.generation_config = generation_config;
-
-        this.num_decoder_layers = this.config.num_decoder_layers;
-        this.num_decoder_heads = this.config.num_heads;
-        this.decoder_dim_kv = this.config.d_kv;
-
-        this.num_encoder_layers = this.config.num_layers;
-        this.num_encoder_heads = this.config.num_heads;
-        this.encoder_dim_kv = this.config.d_kv;
-    }
-}
+export class T5ForConditionalGeneration extends T5PreTrainedModel { }
 //////////////////////////////////////////////////
 
 
@@ -2286,28 +2572,7 @@ export class LongT5Model extends LongT5PreTrainedModel { }
 /**
  * LONGT5 Model with a `language modeling` head on top.
  */
-export class LongT5ForConditionalGeneration extends LongT5PreTrainedModel {
-    /**
-     * Creates a new instance of the `LongT5ForConditionalGeneration` class.
-     * @param {Object} config The model configuration.
-     * @param {any} session session for the model.
-     * @param {any} decoder_merged_session session for the decoder.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, decoder_merged_session, generation_config) {
-        super(config, session);
-        this.decoder_merged_session = decoder_merged_session;
-        this.generation_config = generation_config;
-
-        this.num_decoder_layers = this.config.num_decoder_layers;
-        this.num_decoder_heads = this.config.num_heads;
-        this.decoder_dim_kv = this.config.d_kv;
-
-        this.num_encoder_layers = this.config.num_layers;
-        this.num_encoder_heads = this.config.num_heads;
-        this.encoder_dim_kv = this.config.d_kv;
-    }
-}
+export class LongT5ForConditionalGeneration extends LongT5PreTrainedModel { }
 //////////////////////////////////////////////////
 
 
@@ -2320,29 +2585,7 @@ export class MT5Model extends MT5PreTrainedModel { }
 /**
  * A class representing a conditional sequence-to-sequence model based on the MT5 architecture.
  */
-export class MT5ForConditionalGeneration extends MT5PreTrainedModel {
-
-    /**
-     * Creates a new instance of the `MT5ForConditionalGeneration` class.
-     * @param {any} config The model configuration.
-     * @param {any} session The ONNX session containing the encoder weights.
-     * @param {any} decoder_merged_session The ONNX session containing the merged decoder weights.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, decoder_merged_session, generation_config) {
-        super(config, session);
-        this.decoder_merged_session = decoder_merged_session;
-        this.generation_config = generation_config;
-
-        this.num_decoder_layers = this.config.num_decoder_layers;
-        this.num_decoder_heads = this.config.num_heads;
-        this.decoder_dim_kv = this.config.d_kv;
-
-        this.num_encoder_layers = this.config.num_layers;
-        this.num_encoder_heads = this.config.num_heads;
-        this.encoder_dim_kv = this.config.d_kv;
-    }
-}
+export class MT5ForConditionalGeneration extends MT5PreTrainedModel { }
 //////////////////////////////////////////////////
 
 //////////////////////////////////////////////////
@@ -2357,30 +2600,7 @@ export class BartModel extends BartPretrainedModel { }
 /**
  * The BART Model with a language modeling head. Can be used for summarization.
  */
-export class BartForConditionalGeneration extends BartPretrainedModel {
-
-    /**
-     * Creates a new instance of the `BartForConditionalGeneration` class.
-     * @param {Object} config The configuration object for the Bart model.
-     * @param {Object} session The ONNX session used to execute the model.
-     * @param {Object} decoder_merged_session The ONNX session used to execute the decoder.
-     * @param {Object} generation_config The generation configuration object.
-     */
-    constructor(config, session, decoder_merged_session, generation_config) {
-        super(config, session);
-        this.decoder_merged_session = decoder_merged_session;
-        this.generation_config = generation_config;
-
-        this.num_decoder_layers = this.config.decoder_layers;
-        this.num_decoder_heads = this.config.decoder_attention_heads;
-        this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
-
-        this.num_encoder_layers = this.config.encoder_layers;
-        this.num_encoder_heads = this.config.encoder_attention_heads;
-        this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
-    }
-
-}
+export class BartForConditionalGeneration extends BartPretrainedModel { }
 
 /**
  * Bart model with a sequence classification/head on top (a linear layer on top of the pooled output)
@@ -2411,30 +2631,7 @@ export class MBartModel extends MBartPreTrainedModel { }
 /**
  * The MBART Model with a language modeling head. Can be used for summarization, after fine-tuning the pretrained models.
  */
-export class MBartForConditionalGeneration extends MBartPreTrainedModel {
-
-    /**
-     * Creates a new instance of the `MBartForConditionalGeneration` class.
-     * @param {Object} config The configuration object for the Bart model.
-     * @param {Object} session The ONNX session used to execute the model.
-     * @param {Object} decoder_merged_session The ONNX session used to execute the decoder.
-     * @param {Object} generation_config The generation configuration object.
-     */
-    constructor(config, session, decoder_merged_session, generation_config) {
-        super(config, session);
-        this.decoder_merged_session = decoder_merged_session;
-        this.generation_config = generation_config;
-
-        this.num_decoder_layers = this.config.decoder_layers;
-        this.num_decoder_heads = this.config.decoder_attention_heads;
-        this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
-
-        this.num_encoder_layers = this.config.encoder_layers;
-        this.num_encoder_heads = this.config.encoder_attention_heads;
-        this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
-    }
-
-}
+export class MBartForConditionalGeneration extends MBartPreTrainedModel { }
 
 /**
  * MBart model with a sequence classification/head on top (a linear layer on top of the pooled output).
@@ -2452,26 +2649,7 @@ export class MBartForSequenceClassification extends MBartPreTrainedModel {
 }
 
 
-export class MBartForCausalLM extends MBartPreTrainedModel {
-    /**
-     * Creates a new instance of the `MBartForCausalLM` class.
-     * @param {Object} config Configuration object for the model.
-     * @param {Object} decoder_merged_session ONNX Session object for the decoder.
-     * @param {Object} generation_config Configuration object for the generation process.
-     */
-    constructor(config, decoder_merged_session, generation_config) {
-        super(config, decoder_merged_session);
-        this.generation_config = generation_config;
-
-        this.num_decoder_layers = this.config.decoder_layers;
-        this.num_decoder_heads = this.config.decoder_attention_heads;
-        this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
-
-        this.num_encoder_layers = this.config.encoder_layers;
-        this.num_encoder_heads = this.config.encoder_attention_heads;
-        this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
-    }
-}
+export class MBartForCausalLM extends MBartPreTrainedModel { }
 //////////////////////////////////////////////////
 
 
@@ -2487,29 +2665,7 @@ export class BlenderbotModel extends BlenderbotPreTrainedModel { }
 /**
  * The Blenderbot Model with a language modeling head. Can be used for summarization.
  */
-export class BlenderbotForConditionalGeneration extends BlenderbotPreTrainedModel {
-
-    /**
-     * Creates a new instance of the `BlenderbotForConditionalGeneration` class.
-     * @param {any} config The model configuration.
-     * @param {any} session The ONNX session containing the encoder weights.
-     * @param {any} decoder_merged_session The ONNX session containing the merged decoder weights.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, decoder_merged_session, generation_config) {
-        super(config, session);
-        this.decoder_merged_session = decoder_merged_session;
-        this.generation_config = generation_config;
-
-        this.num_decoder_layers = this.config.decoder_layers;
-        this.num_decoder_heads = this.config.decoder_attention_heads;
-        this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
-
-        this.num_encoder_layers = this.config.encoder_layers;
-        this.num_encoder_heads = this.config.encoder_attention_heads;
-        this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
-    }
-}
+export class BlenderbotForConditionalGeneration extends BlenderbotPreTrainedModel { }
 //////////////////////////////////////////////////
 
 
@@ -2525,29 +2681,7 @@ export class BlenderbotSmallModel extends BlenderbotSmallPreTrainedModel { }
 /**
  * The BlenderbotSmall Model with a language modeling head. Can be used for summarization.
  */
-export class BlenderbotSmallForConditionalGeneration extends BlenderbotSmallPreTrainedModel {
-
-    /**
-     * Creates a new instance of the `BlenderbotForConditionalGeneration` class.
-     * @param {any} config The model configuration.
-     * @param {any} session The ONNX session containing the encoder weights.
-     * @param {any} decoder_merged_session The ONNX session containing the merged decoder weights.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, decoder_merged_session, generation_config) {
-        super(config, session);
-        this.decoder_merged_session = decoder_merged_session;
-        this.generation_config = generation_config;
-
-        this.num_decoder_layers = this.config.decoder_layers;
-        this.num_decoder_heads = this.config.decoder_attention_heads;
-        this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
-
-        this.num_encoder_layers = this.config.encoder_layers;
-        this.num_encoder_heads = this.config.encoder_attention_heads;
-        this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
-    }
-}
+export class BlenderbotSmallForConditionalGeneration extends BlenderbotSmallPreTrainedModel { }
 //////////////////////////////////////////////////
 
 
@@ -2775,119 +2909,169 @@ export class ASTForAudioClassification extends ASTPreTrainedModel { }
 
 //////////////////////////////////////////////////
 // Whisper models
-export class WhisperPreTrainedModel extends PreTrainedModel { };
+export class WhisperPreTrainedModel extends PreTrainedModel {
+
+    requires_attention_mask = false;
+    main_input_name = 'input_features';
+    forward_params = [
+        'input_features',
+        'attention_mask',
+        'decoder_input_ids',
+        'decoder_attention_mask',
+        'past_key_values',
+    ];
+};
 
 /**
  * WhisperModel class for training Whisper models without a language model head.
  */
 export class WhisperModel extends WhisperPreTrainedModel { }
 
+
 /**
  * WhisperForConditionalGeneration class for generating conditional outputs from Whisper models.
  */
 export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
 
-    requires_attention_mask = false;
-    main_input_name = 'input_features';
+    _prepare_generation_config(generation_config, kwargs) {
+        return /** @type {WhisperGenerationConfig} */ (super._prepare_generation_config(generation_config, kwargs, WhisperGenerationConfig));
+    }
 
     /**
-     * Creates a new instance of the `WhisperForConditionalGeneration` class.
-     * @param {Object} config Configuration object for the model.
-     * @param {Object} session ONNX Session object for the model.
-     * @param {Object} decoder_merged_session ONNX Session object for the decoder.
-     * @param {Object} generation_config Configuration object for the generation process.
-     */
-    constructor(config, session, decoder_merged_session, generation_config) {
-        super(config, session);
-        this.decoder_merged_session = decoder_merged_session;
-        this.generation_config = generation_config;
+     * 
+     * @param {WhisperGenerationConfig} generation_config 
+     */
+    _retrieve_init_tokens(generation_config) {
+        // prefix tokens are of the form: 
+        //  - Multilingual: <|startoftranscript|> <|lang_id|> <|task|> [<|notimestamps|>]
+        //  - English-only: <|startoftranscript|> [<|notimestamps|>]
+
+        // 1. Handle <|startoftranscript|> token
+        const init_tokens = [generation_config.decoder_start_token_id];
+
+        // 2. Handle <|lang_id|> and <|task> tokens
+        let language = generation_config.language;
+        const task = generation_config.task;
+        if (generation_config.is_multilingual) {
+            if (!language) {
+                // TODO: Implement language detection
+                console.warn('No language specified - defaulting to English (en).');
+                language = 'en';
+            }
 
-        this.num_decoder_layers = this.config.decoder_layers;
-        this.num_decoder_heads = this.config.decoder_attention_heads;
-        this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
+            // Add language token
+            const language_code = whisper_language_to_code(language);
+            const language_token = `<|${language_code}|>`;
+            init_tokens.push(generation_config.lang_to_id[language_token])
 
-        this.num_encoder_layers = this.config.encoder_layers;
-        this.num_encoder_heads = this.config.encoder_attention_heads;
-        this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
-    }
+            // Add task token
+            // NOTE: Defaults to 'transcribe' if no task is specified
+            init_tokens.push(generation_config.task_to_id[task ?? 'transcribe']);
 
-    /**
-     * @typedef {Object} WhisperGenerationConfig
-     * @extends GenerationConfig
-     * @property {boolean} [return_timestamps=null] Whether to return the timestamps with the text. This enables the `WhisperTimestampsLogitsProcessor`.
-     * @property {boolean} [return_token_timestamps=null] Whether to return token-level timestamps
-     * with the text. This can be used with or without the `return_timestamps` option. To get word-level
-     * timestamps, use the tokenizer to group the tokens into words.
-     * @property {number} [num_frames=null]  The number of audio frames available in this chunk. This is only used generating word-level timestamps.
-     */
+        } else if (language || task) {
+            throw new Error(
+                "Cannot specify `task` or `language` for an English-only model. If the model is intended to be multilingual, pass `is_multilingual=true` to generate, or update the generation config."
+            )
+        }
+
+        // 3. Handle <|notimestamps|> token
+        if (
+            !generation_config.return_timestamps
+            && generation_config.no_timestamps_token_id
+            && init_tokens.at(-1) !== generation_config.no_timestamps_token_id
+        ) {
+            init_tokens.push(generation_config.no_timestamps_token_id);
+        } else if (
+            generation_config.return_timestamps
+            &&
+            init_tokens.at(-1) === generation_config.no_timestamps_token_id
+        ) {
+            console.warn("<|notimestamps|> prompt token is removed from generation_config since `return_timestamps` is set to `true`.");
+            init_tokens.pop();
+        }
+
+        // let's make sure we don't pass `null` tokens as prompt tokens
+        return init_tokens.filter(token => token != null);
+    }
 
     /**
-     * Generates outputs based on input and generation configuration.
-     * @param {Object} inputs Input data for the model.
-     * @param {WhisperGenerationConfig} generation_config Configuration object for the generation process.
-     * @param {Object} logits_processor Optional logits processor object.
-     * @returns {Promise<Object>} Promise object represents the generated outputs.
+     * Transcribes or translates log-mel input features to a sequence of auto-regressively generated token ids.
+     * @param {import('./models/whisper/generation_whisper.js').WhisperGenerationFunctionParameters} options
+     * @returns {Promise<ModelOutput|Tensor>} The output of the model, which can contain the generated token ids, attentions, and scores.
      */
-    async generate(
-        inputs,
+    async generate({
+        inputs = null,
         generation_config = null,
         logits_processor = null,
-        // {
-        //     return_timestamps = null,
-        //     return_token_timestamps = null,
-        //     language = null,
-        //     task = null,
-        // } = {},
-    ) {
-        // Create generation config object
-        generation_config = this._get_generation_config(generation_config);
+        stopping_criteria = null,
 
+        // Whisper-specific options (passed to kwargs)
+        // prompt_ids = null,
+        // language = null,
+        // task = null,
 
-        // Whisper has additional options for returning timestamps
-        generation_config.return_timestamps ??= false;
+        ...kwargs
+    }) {
+        generation_config = this._prepare_generation_config(generation_config, kwargs);
 
-        // TODO add language and task
+        const init_tokens = kwargs.decoder_input_ids ?? this._retrieve_init_tokens(generation_config);
 
         if (generation_config.return_timestamps) {
-            logits_processor = [new WhisperTimeStampLogitsProcessor(generation_config)]
+            logits_processor ??= new LogitsProcessorList();
+            logits_processor.push(
+                new WhisperTimeStampLogitsProcessor(generation_config, init_tokens)
+            );
         }
 
-        if (generation_config.return_token_timestamps) {
-            generation_config.output_attentions = true;
-            generation_config.return_dict_in_generate = true;
-
-            if (generation_config.task === 'translate') {
-                console.warn("Token-level timestamps may not be reliable for task 'translate'.")
-            }
+        if (generation_config.begin_suppress_tokens) {
+            logits_processor ??= new LogitsProcessorList();
+            logits_processor.push(
+                new SuppressTokensAtBeginLogitsProcessor(generation_config.begin_suppress_tokens, init_tokens.length)
+            );
+        }
 
+        if (generation_config.return_token_timestamps) {
             if (!generation_config.alignment_heads) {
                 throw new Error(
                     "Model generation config has no `alignment_heads`, token-level timestamps not available. " +
                     "See https://gist.github.com/hollance/42e32852f24243b748ae6bc1f985b13a on how to add this property to the generation config."
                 )
             }
+
+            if (generation_config.task === 'translate') {
+                console.warn("Token-level timestamps may not be reliable for task 'translate'.")
+            }
+
+            generation_config.output_attentions = true;
+            generation_config.return_dict_in_generate = true;
         }
 
-        const outputs = await super.generate(inputs, generation_config, logits_processor);
+        const outputs = await super.generate({
+            inputs,
+            generation_config,
+            logits_processor,
+            decoder_input_ids: init_tokens,
+            ...kwargs
+        });
 
-        if (generation_config.return_token_timestamps && generation_config.alignment_heads) {
+        if (generation_config.return_token_timestamps) {
             outputs["token_timestamps"] = this._extract_token_timestamps(
                 outputs,
                 generation_config.alignment_heads,
                 generation_config.num_frames,
-            )
+            );
         }
 
-        return outputs
+        return outputs;
     }
 
     /**
      * Calculates token-level timestamps using the encoder-decoder cross-attentions and
      * dynamic time-warping (DTW) to map each output token to a position in the input audio.
+     * If `num_frames` is specified, the encoder-decoder cross-attentions will be cropped before applying DTW.
      * @param {Object} generate_outputs Outputs generated by the model
-     * @param {Tensor[][][]} generate_outputs.cross_attentions The cross attentions output by the model
-     * @param {Tensor[][][]} generate_outputs.decoder_attentions The decoder attentions output by the model
-     * @param {number[][]} generate_outputs.sequences The sequences output by the model
+     * @param {Tensor[][]} generate_outputs.cross_attentions The cross attentions output by the model
+     * @param {Tensor} generate_outputs.sequences The sequences output by the model
      * @param {number[][]} alignment_heads Alignment heads of the model
      * @param {number} [num_frames=null] Number of frames in the input audio.
      * @param {number} [time_precision=0.02] Precision of the timestamps in seconds
@@ -2900,6 +3084,12 @@ export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
                 "This is most likely because the model was not exported with `output_attentions=True`."
             )
         }
+        if (num_frames == null) {
+            console.warn(
+                "`num_frames` has not been set, meaning the entire audio will be analyzed. " +
+                "This may lead to inaccurate token-level timestamps for short audios (< 30 seconds)."
+            );
+        }
 
         let median_filter_width = this.config.median_filter_width;
         if (median_filter_width === undefined) {
@@ -2907,53 +3097,55 @@ export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
             median_filter_width = 7;
         }
 
-        const batchedMatrices = generate_outputs.cross_attentions.map(batch => {
-            // Create a list with `decoder_layers` elements, each a tensor of shape
-            // (batch size, attention_heads, output length, input length).
-            let cross_attentions = Array.from({ length: this.config.decoder_layers },
-                (_, i) => cat(batch.map(x => x[i]), 2)
-            );
-
-            let weights = stack(alignment_heads.map(([l, h]) => {
-                return num_frames
-                    ? cross_attentions[l].slice(null, h, null, [0, num_frames])
-                    : cross_attentions[l].slice(null, h);
-            }));
-            weights = weights.transpose(1, 0, 2, 3)
+        // TODO: Improve batch processing
+        const batch = generate_outputs.cross_attentions;
+        // Create a list with `decoder_layers` elements, each a tensor of shape
+        // (batch size, attention_heads, output length, input length).
+        const cross_attentions = Array.from({ length: this.config.decoder_layers },
+            // Concatenate the cross attentions for each layer across sequence length dimension.
+            (_, i) => cat(batch.map(x => x[i]), 2)
+        );
 
-            let [std, calculatedMean] = std_mean(weights, -2, 0, true);
+        const weights = stack(alignment_heads.map(([l, h]) => {
+            if (l >= cross_attentions.length) {
+                throw new Error(`Layer index ${l} is out of bounds for cross attentions (length ${cross_attentions.length}).`)
+            }
+            return num_frames
+                ? cross_attentions[l].slice(null, h, null, [0, num_frames])
+                : cross_attentions[l].slice(null, h);
+        })).transpose(1, 0, 2, 3);
 
-            // Normalize and smoothen the weights.
-            let smoothedWeights = weights.clone(); // [1, 8, seqLength, 1500]
+        const [std, calculatedMean] = std_mean(weights, -2, 0, true);
 
-            for (let a = 0; a < smoothedWeights.dims[0]; ++a) {
-                let aTensor = smoothedWeights[a]; // [8, seqLength, 1500]
+        // Normalize and smoothen the weights.
+        const smoothedWeights = weights.clone(); // [1, 8, seqLength, 1500]
 
-                for (let b = 0; b < aTensor.dims[0]; ++b) {
-                    let bTensor = aTensor[b]; // [seqLength, 1500]
+        for (let a = 0; a < smoothedWeights.dims[0]; ++a) {
+            const aTensor = smoothedWeights[a]; // [8, seqLength, 1500]
 
-                    const stdTensor = std[a][b][0]; // [1500]
-                    const meanTensor = calculatedMean[a][b][0]; // [1500]
+            for (let b = 0; b < aTensor.dims[0]; ++b) {
+                const bTensor = aTensor[b]; // [seqLength, 1500]
 
-                    for (let c = 0; c < bTensor.dims[0]; ++c) {
+                const stdTensorData = std[a][b][0].data; // [1500]
+                const meanTensorData = calculatedMean[a][b][0].data; // [1500]
 
-                        let cTensor = bTensor[c]; // [1500]
-                        for (let d = 0; d < cTensor.data.length; ++d) {
-                            cTensor.data[d] = (cTensor.data[d] - meanTensor.data[d]) / stdTensor.data[d]
-                        }
+                for (let c = 0; c < bTensor.dims[0]; ++c) {
 
-                        // Apply median filter.
-                        cTensor.data.set(medianFilter(cTensor.data, median_filter_width))
+                    let cTensorData = bTensor[c].data; // [1500]
+                    for (let d = 0; d < cTensorData.length; ++d) {
+                        cTensorData[d] = (cTensorData[d] - meanTensorData[d]) / stdTensorData[d]
                     }
+
+                    // Apply median filter.
+                    cTensorData.set(medianFilter(cTensorData, median_filter_width))
                 }
             }
+        }
 
-            // Average the different cross-attention heads.
-            const matrix = mean(smoothedWeights, 1);
-            return matrix;
-        });
+        // Average the different cross-attention heads.
+        const batchedMatrices = [mean(smoothedWeights, 1)];
 
-        const timestampsShape = [generate_outputs.sequences.length, generate_outputs.sequences[0].length];
+        const timestampsShape = generate_outputs.sequences.dims;
 
         const timestamps = new Tensor(
             'float32',
@@ -2966,16 +3158,16 @@ export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
             // NOTE: Since we run only one batch at a time, we can squeeze to get the same dimensions
             // as the python implementation
             const matrix = batchedMatrices[batch_idx].neg().squeeze_(0);
-            let [text_indices, time_indices] = dynamicTimeWarping(matrix);
+            const [text_indices, time_indices] = dynamic_time_warping(matrix.tolist());
 
-            let diffs = Array.from({ length: text_indices.length - 1 }, (v, i) => text_indices[i + 1] - text_indices[i]);
-            let jumps = mergeArrays([1], diffs).map(x => !!x); // convert to boolean
+            const diffs = Array.from({ length: text_indices.length - 1 }, (v, i) => text_indices[i + 1] - text_indices[i]);
+            const jumps = mergeArrays([1], diffs).map(x => !!x); // convert to boolean
 
-            let jump_times = [];
+            const jump_times = [];
             for (let i = 0; i < jumps.length; ++i) {
                 if (jumps[i]) {
-                    jump_times.push(time_indices[i] * time_precision);
                     // NOTE: No point in rounding here, since we set to Float32Array later
+                    jump_times.push(time_indices[i] * time_precision);
                 }
             }
             timestamps[batch_idx].data.set(jump_times, 1)
@@ -2992,66 +3184,203 @@ export class WhisperForConditionalGeneration extends WhisperPreTrainedModel {
  */
 export class VisionEncoderDecoderModel extends PreTrainedModel {
     main_input_name = 'pixel_values';
+    forward_params = [
+        'pixel_values',
+        'input_ids',
+        'encoder_hidden_states',
+        'past_key_values',
+    ];
+}
+//////////////////////////////////////////////////
 
-    /**
-     * Creates a new instance of the `VisionEncoderDecoderModel` class.
-     * @param {Object} config The configuration object specifying the hyperparameters and other model settings.
-     * @param {Object} session The ONNX session containing the encoder model.
-     * @param {any} decoder_merged_session The ONNX session containing the merged decoder model.
-     * @param {Object} generation_config Configuration object for the generation process.
-     */
-    constructor(config, session, decoder_merged_session, generation_config) {
-        super(config, session);
-        this.decoder_merged_session = decoder_merged_session;
-        this.generation_config = generation_config;
-
-        // Extract configs
-        const encoderConfig = this.config.encoder;
-        const decoderConfig = this.config.decoder;
-
-        // Validate encoder
-        const encoderModelType = encoderConfig.model_type;
-        const encoderModel =
-            MODEL_MAPPING_NAMES_ENCODER_ONLY.get(encoderModelType)
-            ?? MODEL_MAPPING_NAMES_ENCODER_DECODER.get(encoderModelType);
-        if (!encoderModel) {
-            console.warn(`Model type for encoder '${encoderModelType}' not found, assuming encoder-only architecture. Please report this at https://github.com/xenova/transformers.js/issues/new/choose.`);
-        }
 
-        // Validate decoder
-        const decoderModel = MODEL_WITH_LM_HEAD_MAPPING_NAMES.get(decoderConfig.model_type);
-        if (!decoderModel) {
-            throw new Error(`Unable to construct \`VisionEncoderDecoder\` due to unsupported decoder: "${this.config.decoder.model_type}"`);
+//////////////////////////////////////////////////
+// LLaVa Models
+export class LlavaPreTrainedModel extends PreTrainedModel {
+    forward_params = [
+        'input_ids',
+        'pixel_values',
+        'attention_mask',
+        'position_ids',
+        'past_key_values',
+    ];
+}
+
+/**
+ * The LLAVA model which consists of a vision backbone and a language model.
+ */
+export class LlavaForConditionalGeneration extends LlavaPreTrainedModel {
+
+    _merge_input_ids_with_image_features({
+        inputs_embeds,
+        image_features,
+        input_ids,
+        attention_mask,
+    }) {
+
+        const image_token_index = this.config.image_token_index;
+
+        const idsList = input_ids.tolist();
+
+        // NOTE: we use .findIndex instead of .indexOf to perform weak comparison (==) between BigInt and Number
+        const indexOfImage = idsList.map(x => x.findIndex(x => x == image_token_index));
+
+        const noImages = indexOfImage.every(x => x === -1);
+        const allImages = indexOfImage.every(x => x !== -1);
+        if (!noImages && !allImages) {
+            // Check for padding reasons
+            throw new Error('Every input should contain either 0 or 1 image token.');
         }
 
-        // @ts-ignore
-        const decoderModelClass = decoderModel[1];
-        // @ts-ignore
-        const decoder = new decoderModelClass(decoderConfig, decoder_merged_session, generation_config);
+        if (noImages) {
+            return {
+                inputs_embeds,
+                attention_mask,
+            }
+        }
 
-        this.add_encoder_pkv = 'num_decoder_layers' in decoder;
-        if (this.add_encoder_pkv) {
-            // Decoder is part of an encoder-decoder model
-            this.num_decoder_layers = decoder.num_decoder_layers;
-            this.num_decoder_heads = decoder.num_decoder_heads;
-            this.decoder_dim_kv = decoder.decoder_dim_kv;
+        const stacked = [];
+        const stacked_attention_mask = [];
+        for (let i = 0; i < indexOfImage.length; ++i) {
+            const index = indexOfImage[i];
+
+            const e = inputs_embeds[i];
+            const im = image_features[i];
+            const am = attention_mask[i];
+            stacked.push(
+                cat([
+                    e.slice([0, index]),
+                    im,
+                    e.slice([index + 1, e.dims[0]]),
+                ], 0)
+            );
 
-            this.num_encoder_layers = decoder.num_encoder_layers;
-            this.num_encoder_heads = decoder.num_encoder_heads;
-            this.encoder_dim_kv = decoder.encoder_dim_kv;
+            stacked_attention_mask.push(
+                cat([
+                    am.slice([0, index]),
+                    ones([im.dims[0]]),
+                    am.slice([index + 1, am.dims[0]])
+                ], 0)
+            )
+        }
 
-        } else {
-            // Decoder is a decoder-only model
-            this.num_layers = decoder.num_layers;
-            this.num_heads = decoder.num_heads;
-            this.dim_kv = decoder.dim_kv;
+        return {
+            inputs_embeds: stack(stacked, 0),
+            attention_mask: stack(stacked_attention_mask, 0),
         }
     }
 }
 //////////////////////////////////////////////////
 
-//////////////////////////////////////////////////
-// CLIP models
+export class Moondream1ForConditionalGeneration extends LlavaForConditionalGeneration { } // NOTE: extends LlavaForConditionalGeneration
+
+export class Florence2PreTrainedModel extends PreTrainedModel {
+    forward_params = [
+        // Encoder inputs
+        'input_ids',
+        'inputs_embeds',
+        'attention_mask',
+        'pixel_values',
+
+        // Decoder inputs
+        'encoder_outputs',
+        'decoder_input_ids',
+        'decoder_inputs_embeds',
+        'decoder_attention_mask',
+        'past_key_values',
+    ];
+    main_input_name = 'inputs_embeds';
+}
+
+export class Florence2ForConditionalGeneration extends Florence2PreTrainedModel {
+
+    _merge_input_ids_with_image_features({
+        inputs_embeds,
+        image_features,
+        input_ids,
+        attention_mask,
+    }) {
+        return {
+            inputs_embeds: cat([
+                image_features, // image embeds
+                inputs_embeds, // task prefix embeds
+            ], 1),
+            attention_mask: cat([
+                ones(image_features.dims.slice(0, 2)), // image attention mask
+                attention_mask, // task prefix attention mask
+            ], 1),
+        }
+    }
+
+    async _prepare_inputs_embeds({ input_ids, pixel_values, inputs_embeds, attention_mask }) {
+        if (!input_ids && !pixel_values) {
+            throw new Error('Either `input_ids` or `pixel_values` should be provided.');
+        }
+
+        // 1. Possibly, extract the input embeddings
+        let text_features, image_features;
+        if (input_ids) {
+            text_features = await this.encode_text({ input_ids });
+        }
+        if (pixel_values) {
+            image_features = await this.encode_image({ pixel_values });
+        }
+
+        // 2. Possibly, merge text and images
+        if (text_features && image_features) {
+            ({ inputs_embeds, attention_mask } = this._merge_input_ids_with_image_features({
+                inputs_embeds: text_features,
+                image_features,
+                input_ids,
+                attention_mask,
+            }));
+        } else {
+            inputs_embeds = text_features || image_features;
+        }
+
+        return { inputs_embeds, attention_mask };
+    }
+
+    async forward({
+        input_ids,
+        pixel_values,
+        attention_mask,
+        decoder_input_ids,
+        decoder_attention_mask,
+        encoder_outputs,
+        past_key_values,
+
+        inputs_embeds,
+        decoder_inputs_embeds,
+    }) {
+        if (!inputs_embeds) {
+            ({ inputs_embeds, attention_mask } = await this._prepare_inputs_embeds({ input_ids, pixel_values, inputs_embeds, attention_mask }));
+        }
+
+        if (!encoder_outputs) {
+            // Must compute encoder outputs
+            let { last_hidden_state } = await encoderForward(this, { inputs_embeds, attention_mask });
+            encoder_outputs = last_hidden_state;
+        }
+
+        if (!decoder_inputs_embeds) {
+            if (!decoder_input_ids) {
+                throw new Error('Either `decoder_input_ids` or `decoder_inputs_embeds` should be provided.');
+            }
+            decoder_inputs_embeds = await this.encode_text({ input_ids: decoder_input_ids });
+        }
+
+        const decoderFeeds = {
+            inputs_embeds: decoder_inputs_embeds,
+            attention_mask: decoder_attention_mask,
+            encoder_attention_mask: attention_mask,
+            encoder_hidden_states: encoder_outputs,
+            past_key_values,
+        };
+        const decoder_outputs = await decoderForward(this, decoderFeeds, true);
+        return decoder_outputs;
+    }
+}
 export class CLIPPreTrainedModel extends PreTrainedModel { }
 
 /**
@@ -3060,7 +3389,7 @@ export class CLIPPreTrainedModel extends PreTrainedModel { }
  * **Example:** Perform zero-shot image classification with a `CLIPModel`.
  * 
  * ```javascript
- * import { AutoTokenizer, AutoProcessor, CLIPModel, RawImage } from '@xenova/transformers';
+ * import { AutoTokenizer, AutoProcessor, CLIPModel, RawImage } from '@huggingface/transformers';
  * 
  * // Load tokenizer, processor, and model
  * let tokenizer = await AutoTokenizer.from_pretrained('Xenova/clip-vit-base-patch16');
@@ -3117,7 +3446,7 @@ export class CLIPTextModel extends CLIPPreTrainedModel {
  * **Example:** Compute text embeddings with `CLIPTextModelWithProjection`.
  * 
  * ```javascript
- * import { AutoTokenizer, CLIPTextModelWithProjection } from '@xenova/transformers';
+ * import { AutoTokenizer, CLIPTextModelWithProjection } from '@huggingface/transformers';
  * 
  * // Load tokenizer and text model
  * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/clip-vit-base-patch16');
@@ -3164,7 +3493,7 @@ export class CLIPVisionModel extends CLIPPreTrainedModel {
  * **Example:** Compute vision embeddings with `CLIPVisionModelWithProjection`.
  * 
  * ```javascript
- * import { AutoProcessor, CLIPVisionModelWithProjection, RawImage} from '@xenova/transformers';
+ * import { AutoProcessor, CLIPVisionModelWithProjection, RawImage} from '@huggingface/transformers';
  * 
  * // Load processor and vision model
  * const processor = await AutoProcessor.from_pretrained('Xenova/clip-vit-base-patch16');
@@ -3205,7 +3534,7 @@ export class SiglipPreTrainedModel extends PreTrainedModel { }
  * **Example:** Perform zero-shot image classification with a `SiglipModel`.
  * 
  * ```javascript
- * import { AutoTokenizer, AutoProcessor, SiglipModel, RawImage } from '@xenova/transformers';
+ * import { AutoTokenizer, AutoProcessor, SiglipModel, RawImage } from '@huggingface/transformers';
  * 
  * // Load tokenizer, processor, and model
  * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/siglip-base-patch16-224');
@@ -3250,7 +3579,7 @@ export class SiglipModel extends SiglipPreTrainedModel { }
  * **Example:** Compute text embeddings with `SiglipTextModel`.
  * 
  * ```javascript
- * import { AutoTokenizer, SiglipTextModel } from '@xenova/transformers';
+ * import { AutoTokenizer, SiglipTextModel } from '@huggingface/transformers';
  * 
  * // Load tokenizer and text model
  * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/siglip-base-patch16-224');
@@ -3286,7 +3615,7 @@ export class SiglipTextModel extends SiglipPreTrainedModel {
  * **Example:** Compute vision embeddings with `SiglipVisionModel`.
  * 
  * ```javascript
- * import { AutoProcessor, SiglipVisionModel, RawImage} from '@xenova/transformers';
+ * import { AutoProcessor, SiglipVisionModel, RawImage} from '@huggingface/transformers';
  * 
  * // Load processor and vision model
  * const processor = await AutoProcessor.from_pretrained('Xenova/siglip-base-patch16-224');
@@ -3334,7 +3663,7 @@ export class CLIPSegModel extends CLIPSegPreTrainedModel { }
  * **Example:** Perform zero-shot image segmentation with a `CLIPSegForImageSegmentation` model.
  * 
  * ```javascript
- * import { AutoTokenizer, AutoProcessor, CLIPSegForImageSegmentation, RawImage } from '@xenova/transformers';
+ * import { AutoTokenizer, AutoProcessor, CLIPSegForImageSegmentation, RawImage } from '@huggingface/transformers';
  * 
  * // Load tokenizer, processor, and model
  * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/clipseg-rd64-refined');
@@ -3380,25 +3709,7 @@ export class CLIPSegForImageSegmentation extends CLIPSegPreTrainedModel { }
 
 //////////////////////////////////////////////////
 // GPT2 models
-export class GPT2PreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `GPT2PreTrainedModel` class.
-     * @param {Object} config The configuration of the model.
-     * @param {any} session The ONNX session containing the model weights.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
-
-        this.num_heads = this.config.n_head
-        this.num_layers = this.config.n_layer
-        this.dim_kv = this.config.n_embd / this.num_heads;
-    }
-}
+export class GPT2PreTrainedModel extends PreTrainedModel { }
 
 export class GPT2Model extends GPT2PreTrainedModel { }
 
@@ -3412,26 +3723,24 @@ export class GPT2LMHeadModel extends GPT2PreTrainedModel { }
 //////////////////////////////////////////////////
 
 //////////////////////////////////////////////////
-// GPTNeo models
-export class GPTNeoPreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `GPTNeoPreTrainedModel` class.
-     * @param {Object} config The configuration of the model.
-     * @param {any} session The ONNX session containing the model weights.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
+// JAIS models
+export class JAISPreTrainedModel extends PreTrainedModel { }
 
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
+/**
+ * The bare JAIS Model transformer outputting raw hidden-states without any specific head on top.
+ */
+export class JAISModel extends JAISPreTrainedModel { }
 
-        this.num_heads = this.config.num_heads;
-        this.num_layers = this.config.num_layers;
-        this.dim_kv = this.config.hidden_size / this.num_heads;
-    }
-}
+/**
+ * The JAIS Model transformer with a language modeling head on top (linear layer with weights tied to the input embeddings).
+ */
+export class JAISLMHeadModel extends JAISPreTrainedModel { }
+//////////////////////////////////////////////////
+
+
+//////////////////////////////////////////////////
+// GPTNeo models
+export class GPTNeoPreTrainedModel extends PreTrainedModel { }
 export class GPTNeoModel extends GPTNeoPreTrainedModel { }
 
 export class GPTNeoForCausalLM extends GPTNeoPreTrainedModel { }
@@ -3439,25 +3748,7 @@ export class GPTNeoForCausalLM extends GPTNeoPreTrainedModel { }
 
 //////////////////////////////////////////////////
 // GPTNeoX models
-export class GPTNeoXPreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `GPTNeoXPreTrainedModel` class.
-     * @param {Object} config The configuration of the model.
-     * @param {any} session The ONNX session containing the model weights.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
-
-        this.num_heads = this.config.num_attention_heads;
-        this.num_layers = this.config.num_hidden_layers;
-        this.dim_kv = this.config.hidden_size / this.num_heads;
-    }
-}
+export class GPTNeoXPreTrainedModel extends PreTrainedModel { }
 export class GPTNeoXModel extends GPTNeoXPreTrainedModel { }
 
 export class GPTNeoXForCausalLM extends GPTNeoXPreTrainedModel { }
@@ -3466,25 +3757,7 @@ export class GPTNeoXForCausalLM extends GPTNeoXPreTrainedModel { }
 
 //////////////////////////////////////////////////
 // GPT-J models
-export class GPTJPreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `GPTJPreTrainedModel` class.
-     * @param {Object} config The configuration of the model.
-     * @param {any} session The ONNX session containing the model weights.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
-
-        this.num_heads = this.config.n_head
-        this.num_layers = this.config.n_layer
-        this.dim_kv = this.config.n_embd / this.num_heads;
-    }
-}
+export class GPTJPreTrainedModel extends PreTrainedModel { }
 
 export class GPTJModel extends GPTJPreTrainedModel { }
 
@@ -3494,25 +3767,7 @@ export class GPTJForCausalLM extends GPTJPreTrainedModel { }
 
 //////////////////////////////////////////////////
 // GPTBigCode models
-export class GPTBigCodePreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `GPTBigCodePreTrainedModel` class.
-     * @param {Object} config The configuration of the model.
-     * @param {any} session The ONNX session containing the model weights.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
-
-        this.num_heads = this.config.n_head
-        this.num_layers = this.config.n_layer
-        this.dim_kv = this.config.n_embd / this.num_heads;
-    }
-}
+export class GPTBigCodePreTrainedModel extends PreTrainedModel { }
 
 export class GPTBigCodeModel extends GPTBigCodePreTrainedModel { }
 
@@ -3521,25 +3776,7 @@ export class GPTBigCodeForCausalLM extends GPTBigCodePreTrainedModel { }
 
 //////////////////////////////////////////////////
 // CodeGen models
-export class CodeGenPreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `CodeGenPreTrainedModel` class.
-     * @param {Object} config The model configuration object.
-     * @param {Object} session The ONNX session object.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
-
-        this.num_heads = this.config.n_head
-        this.num_layers = this.config.n_layer
-        this.dim_kv = this.config.n_embd / this.num_heads;
-    }
-}
+export class CodeGenPreTrainedModel extends PreTrainedModel { }
 /**
  * CodeGenModel is a class representing a code generation model without a language model head.
  */
@@ -3558,25 +3795,7 @@ export class CodeGenForCausalLM extends CodeGenPreTrainedModel { }
 /**
  * The bare LLama Model outputting raw hidden-states without any specific head on top.
  */
-export class LlamaPreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `LlamaPreTrainedModel` class.
-     * @param {Object} config The model configuration object.
-     * @param {Object} session The ONNX session object.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
-
-        this.num_heads = this.config.num_key_value_heads ?? this.config.num_attention_heads
-        this.num_layers = this.config.num_hidden_layers
-        this.dim_kv = this.config.hidden_size / this.config.num_attention_heads
-    }
-}
+export class LlamaPreTrainedModel extends PreTrainedModel { }
 /**
  * The bare LLaMA Model outputting raw hidden-states without any specific head on top.
  */
@@ -3585,31 +3804,71 @@ export class LlamaModel extends LlamaPreTrainedModel { }
 export class LlamaForCausalLM extends LlamaPreTrainedModel { }
 //////////////////////////////////////////////////
 
+
 //////////////////////////////////////////////////
-// Qwen2 models
+// Granite models
+export class GranitePreTrainedModel extends PreTrainedModel { }
+export class GraniteModel extends GranitePreTrainedModel { }
+export class GraniteForCausalLM extends GranitePreTrainedModel { }
+//////////////////////////////////////////////////
+
+
+//////////////////////////////////////////////////
+// Cohere models
 
 /**
- * The bare Qwen2 Model outputting raw hidden-states without any specific head on top.
+ * The bare Cohere Model outputting raw hidden-states without any specific head on top.
  */
-export class Qwen2PreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `Qwen2PreTrainedModel` class.
-     * @param {Object} config The model configuration object.
-     * @param {Object} session The ONNX session object.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
+export class CoherePreTrainedModel extends PreTrainedModel { }
+export class CohereModel extends CoherePreTrainedModel { }
+
+export class CohereForCausalLM extends CoherePreTrainedModel { }
+//////////////////////////////////////////////////
 
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
+//////////////////////////////////////////////////
+// Gemma models
 
-        this.num_heads = this.config.num_key_value_heads ?? this.config.num_attention_heads
-        this.num_layers = this.config.num_hidden_layers
-        this.dim_kv = this.config.hidden_size / this.config.num_attention_heads
-    }
-}
+/**
+ * The bare Gemma Model outputting raw hidden-states without any specific head on top.
+ */
+export class GemmaPreTrainedModel extends PreTrainedModel { }
+/**
+ * The bare Gemma Model outputting raw hidden-states without any specific head on top.
+ */
+export class GemmaModel extends GemmaPreTrainedModel { }
+
+export class GemmaForCausalLM extends GemmaPreTrainedModel { }
+//////////////////////////////////////////////////
+
+//////////////////////////////////////////////////
+// Gemma2 models
+
+/**
+ * The bare Gemma2 Model outputting raw hidden-states without any specific head on top.
+ */
+export class Gemma2PreTrainedModel extends PreTrainedModel { }
+/**
+ * The bare Gemma2 Model outputting raw hidden-states without any specific head on top.
+ */
+export class Gemma2Model extends Gemma2PreTrainedModel { }
+
+export class Gemma2ForCausalLM extends Gemma2PreTrainedModel { }
+//////////////////////////////////////////////////
+
+//////////////////////////////////////////////////
+export class OpenELMPreTrainedModel extends PreTrainedModel { }
+export class OpenELMModel extends OpenELMPreTrainedModel { }
+
+export class OpenELMForCausalLM extends OpenELMPreTrainedModel { }
+
+
+//////////////////////////////////////////////////
+// Qwen2 models
+
+/**
+ * The bare Qwen2 Model outputting raw hidden-states without any specific head on top.
+ */
+export class Qwen2PreTrainedModel extends PreTrainedModel { }
 /**
  * The bare Qwen2 Model outputting raw hidden-states without any specific head on top.
  */
@@ -3621,26 +3880,7 @@ export class Qwen2ForCausalLM extends Qwen2PreTrainedModel { }
 
 //////////////////////////////////////////////////
 // Phi models
-
-export class PhiPreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `PhiPreTrainedModel` class.
-     * @param {Object} config The model configuration object.
-     * @param {Object} session The ONNX session object.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id;
-
-        this.num_heads = this.config.num_attention_heads;
-        this.num_layers = this.config.num_hidden_layers;
-        this.dim_kv = this.config.hidden_size / this.num_heads;
-    }
-}
+export class PhiPreTrainedModel extends PreTrainedModel { }
 /**
  * The bare Phi Model outputting raw hidden-states without any specific head on top.
  */
@@ -3649,31 +3889,25 @@ export class PhiModel extends PhiPreTrainedModel { }
 export class PhiForCausalLM extends PhiPreTrainedModel { }
 //////////////////////////////////////////////////
 
+//////////////////////////////////////////////////
+// Phi3 models
+export class Phi3PreTrainedModel extends PreTrainedModel { }
+
+/**
+ * The bare Phi3 Model outputting raw hidden-states without any specific head on top.
+ */
+export class Phi3Model extends Phi3PreTrainedModel { }
+
+export class Phi3ForCausalLM extends Phi3PreTrainedModel { }
+//////////////////////////////////////////////////
+
 
 //////////////////////////////////////////////////
 // Bloom models
 /**
  * The Bloom Model transformer with a language modeling head on top (linear layer with weights tied to the input embeddings).
  */
-export class BloomPreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `BloomPreTrainedModel` class.
-     * @param {Object} config The configuration of the model.
-     * @param {any} session The ONNX session containing the model weights.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
-
-        this.num_heads = this.config.n_head
-        this.num_layers = this.config.n_layer
-        this.dim_kv = this.config.hidden_size / this.num_heads;
-    }
-}
+export class BloomPreTrainedModel extends PreTrainedModel { }
 
 /**
  * The bare Bloom Model transformer outputting raw hidden-states without any specific head on top.
@@ -3688,25 +3922,7 @@ export class BloomForCausalLM extends BloomPreTrainedModel { }
 
 //////////////////////////////////////////////////
 // MPT models
-export class MptPreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `MptPreTrainedModel` class.
-     * @param {Object} config The model configuration object.
-     * @param {Object} session The ONNX session object.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
-
-        this.num_heads = this.config.n_heads
-        this.num_layers = this.config.n_layers
-        this.dim_kv = this.config.d_model / this.num_heads;
-    }
-}
+export class MptPreTrainedModel extends PreTrainedModel { }
 
 /**
  * The bare Mpt Model transformer outputting raw hidden-states without any specific head on top.
@@ -3722,25 +3938,7 @@ export class MptForCausalLM extends MptPreTrainedModel { }
 
 //////////////////////////////////////////////////
 // OPT models
-export class OPTPreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `OPTPreTrainedModel` class.
-     * @param {Object} config The model configuration object.
-     * @param {Object} session The ONNX session object.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
-
-        this.num_heads = this.config.num_attention_heads;
-        this.num_layers = this.config.num_hidden_layers;
-        this.dim_kv = this.config.hidden_size / this.num_heads;
-    }
-}
+export class OPTPreTrainedModel extends PreTrainedModel { }
 
 /**
  * The bare OPT Model outputting raw hidden-states without any specific head on top.
@@ -3766,6 +3964,43 @@ export class ViTForImageClassification extends ViTPreTrainedModel {
 }
 //////////////////////////////////////////////////
 
+//////////////////////////////////////////////////
+export class PvtPreTrainedModel extends PreTrainedModel { }
+export class PvtModel extends PvtPreTrainedModel { }
+export class PvtForImageClassification extends PvtPreTrainedModel {
+    /**
+     * @param {any} model_inputs
+     */
+    async _call(model_inputs) {
+        return new SequenceClassifierOutput(await super._call(model_inputs));
+    }
+}
+//////////////////////////////////////////////////
+
+//////////////////////////////////////////////////
+export class ViTMAEPreTrainedModel extends PreTrainedModel { }
+export class ViTMAEModel extends ViTMAEPreTrainedModel { }
+//////////////////////////////////////////////////
+
+
+//////////////////////////////////////////////////
+export class ViTMSNPreTrainedModel extends PreTrainedModel { }
+export class ViTMSNModel extends ViTMSNPreTrainedModel { }
+export class ViTMSNForImageClassification extends ViTMSNPreTrainedModel {
+    /**
+     * @param {any} model_inputs
+     */
+    async _call(model_inputs) {
+        return new SequenceClassifierOutput(await super._call(model_inputs));
+    }
+}
+//////////////////////////////////////////////////
+
+//////////////////////////////////////////////////
+export class GroupViTPreTrainedModel extends PreTrainedModel { }
+export class GroupViTModel extends GroupViTPreTrainedModel { }
+//////////////////////////////////////////////////
+
 
 //////////////////////////////////////////////////
 export class FastViTPreTrainedModel extends PreTrainedModel { }
@@ -3788,7 +4023,7 @@ export class VitMattePreTrainedModel extends PreTrainedModel { }
  * 
  * **Example:** Perform image matting with a `VitMatteForImageMatting` model.
  * ```javascript
- * import { AutoProcessor, VitMatteForImageMatting, RawImage } from '@xenova/transformers';
+ * import { AutoProcessor, VitMatteForImageMatting, RawImage } from '@huggingface/transformers';
  * 
  * // Load processor and model
  * const processor = await AutoProcessor.from_pretrained('Xenova/vitmatte-small-distinctions-646');
@@ -3813,7 +4048,7 @@ export class VitMattePreTrainedModel extends PreTrainedModel { }
  * 
  * You can visualize the alpha matte as follows:
  * ```javascript
- * import { Tensor, cat } from '@xenova/transformers';
+ * import { Tensor, cat } from '@huggingface/transformers';
  * 
  * // Visualize predicted alpha matte
  * const imageTensor = image.toTensor();
@@ -3954,6 +4189,33 @@ export class DetrSegmentationOutput extends ModelOutput {
 }
 //////////////////////////////////////////////////
 
+//////////////////////////////////////////////////
+export class RTDetrPreTrainedModel extends PreTrainedModel { }
+export class RTDetrModel extends RTDetrPreTrainedModel { }
+export class RTDetrForObjectDetection extends RTDetrPreTrainedModel {
+    /**
+     * @param {any} model_inputs
+     */
+    async _call(model_inputs) {
+        return new RTDetrObjectDetectionOutput(await super._call(model_inputs));
+    }
+}
+
+export class RTDetrObjectDetectionOutput extends ModelOutput {
+    /**
+     * @param {Object} output The output of the model.
+     * @param {Tensor} output.logits Classification logits (including no-object) for all queries.
+     * @param {Tensor} output.pred_boxes Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height).
+     * These values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding possible padding).
+     */
+    constructor({ logits, pred_boxes }) {
+        super();
+        this.logits = logits;
+        this.pred_boxes = pred_boxes;
+    }
+}
+//////////////////////////////////////////////////
+
 //////////////////////////////////////////////////
 export class TableTransformerPreTrainedModel extends PreTrainedModel { }
 
@@ -3992,6 +4254,19 @@ export class DeiTForImageClassification extends DeiTPreTrainedModel {
 }
 //////////////////////////////////////////////////
 
+//////////////////////////////////////////////////
+export class HieraPreTrainedModel extends PreTrainedModel { }
+export class HieraModel extends HieraPreTrainedModel { }
+export class HieraForImageClassification extends HieraPreTrainedModel {
+    /**
+     * @param {any} model_inputs
+     */
+    async _call(model_inputs) {
+        return new SequenceClassifierOutput(await super._call(model_inputs));
+    }
+}
+//////////////////////////////////////////////////
+
 
 //////////////////////////////////////////////////
 /**
@@ -4045,7 +4320,7 @@ export class Swin2SRModel extends Swin2SRPreTrainedModel { }
  * **Example:** Super-resolution w/ `Xenova/swin2SR-classical-sr-x2-64`.
  * 
  * ```javascript
- * import { AutoProcessor, Swin2SRForImageSuperResolution, RawImage } from '@xenova/transformers';
+ * import { AutoProcessor, Swin2SRForImageSuperResolution, RawImage } from '@huggingface/transformers';
  * 
  * // Load processor and model
  * const model_id = 'Xenova/swin2SR-classical-sr-x2-64';
@@ -4087,7 +4362,7 @@ export class DPTModel extends DPTPreTrainedModel { }
  * 
  * **Example:** Depth estimation w/ `Xenova/dpt-hybrid-midas`.
  * ```javascript
- * import { DPTForDepthEstimation, AutoProcessor, RawImage, interpolate, max } from '@xenova/transformers';
+ * import { DPTForDepthEstimation, AutoProcessor, RawImage, interpolate, max } from '@huggingface/transformers';
  * 
  * // Load model and processor
  * const model_id = 'Xenova/dpt-hybrid-midas';
@@ -4131,6 +4406,24 @@ export class DepthAnythingForDepthEstimation extends DepthAnythingPreTrainedMode
 //////////////////////////////////////////////////
 
 
+//////////////////////////////////////////////////
+export class SapiensPreTrainedModel extends PreTrainedModel { }
+export class SapiensForSemanticSegmentation extends SapiensPreTrainedModel { }
+export class SapiensForDepthEstimation extends SapiensPreTrainedModel { }
+export class SapiensForNormalEstimation extends SapiensPreTrainedModel { }
+//////////////////////////////////////////////////
+
+//////////////////////////////////////////////////
+export class DepthProPreTrainedModel extends PreTrainedModel { }
+export class DepthProForDepthEstimation extends DepthProPreTrainedModel { }
+//////////////////////////////////////////////////
+
+//////////////////////////////////////////////////
+export class MaskFormerPreTrainedModel extends PreTrainedModel { }
+export class MaskFormerModel extends MaskFormerPreTrainedModel { }
+export class MaskFormerForInstanceSegmentation extends MaskFormerPreTrainedModel { }
+//////////////////////////////////////////////////
+
 //////////////////////////////////////////////////
 export class GLPNPreTrainedModel extends PreTrainedModel { }
 
@@ -4144,7 +4437,7 @@ export class GLPNModel extends GLPNPreTrainedModel { }
  * 
  * **Example:** Depth estimation w/ `Xenova/glpn-kitti`.
  * ```javascript
- * import { GLPNForDepthEstimation, AutoProcessor, RawImage, interpolate, max } from '@xenova/transformers';
+ * import { GLPNForDepthEstimation, AutoProcessor, RawImage, interpolate, max } from '@huggingface/transformers';
  * 
  * // Load model and processor
  * const model_id = 'Xenova/glpn-kitti';
@@ -4187,7 +4480,7 @@ export class DonutSwinPreTrainedModel extends PreTrainedModel { }
  * **Example:** Step-by-step Document Parsing.
  * 
  * ```javascript
- * import { AutoProcessor, AutoTokenizer, AutoModelForVision2Seq, RawImage } from '@xenova/transformers';
+ * import { AutoProcessor, AutoTokenizer, AutoModelForVision2Seq, RawImage } from '@huggingface/transformers';
  * 
  * // Choose model to use
  * const model_id = 'Xenova/donut-base-finetuned-cord-v2';
@@ -4222,7 +4515,7 @@ export class DonutSwinPreTrainedModel extends PreTrainedModel { }
  * **Example:** Step-by-step Document Visual Question Answering (DocVQA)
  * 
  * ```javascript
- * import { AutoProcessor, AutoTokenizer, AutoModelForVision2Seq, RawImage } from '@xenova/transformers';
+ * import { AutoProcessor, AutoTokenizer, AutoModelForVision2Seq, RawImage } from '@huggingface/transformers';
  * 
  * // Choose model to use
  * const model_id = 'Xenova/donut-base-finetuned-docvqa';
@@ -4352,6 +4645,8 @@ export class YolosObjectDetectionOutput extends ModelOutput {
 //////////////////////////////////////////////////
 
 
+
+
 //////////////////////////////////////////////////
 export class SamPreTrainedModel extends PreTrainedModel { }
 
@@ -4361,7 +4656,7 @@ export class SamPreTrainedModel extends PreTrainedModel { }
  * 
  * **Example:** Perform mask generation w/ `Xenova/sam-vit-base`.
  * ```javascript
- * import { SamModel, AutoProcessor, RawImage } from '@xenova/transformers';
+ * import { SamModel, AutoProcessor, RawImage } from '@huggingface/transformers';
  * 
  * const model = await SamModel.from_pretrained('Xenova/sam-vit-base');
  * const processor = await AutoProcessor.from_pretrained('Xenova/sam-vit-base');
@@ -4370,7 +4665,7 @@ export class SamPreTrainedModel extends PreTrainedModel { }
  * const raw_image = await RawImage.read(img_url);
  * const input_points = [[[450, 600]]] // 2D localization of a window
  * 
- * const inputs = await processor(raw_image, input_points);
+ * const inputs = await processor(raw_image, { input_points });
  * const outputs = await model(inputs);
  * 
  * const masks = await processor.post_process_masks(outputs.pred_masks, inputs.original_sizes, inputs.reshaped_input_sizes);
@@ -4396,16 +4691,6 @@ export class SamPreTrainedModel extends PreTrainedModel { }
  * ```
  */
 export class SamModel extends SamPreTrainedModel {
-    /**
-     * Creates a new instance of the `SamModel` class.
-     * @param {Object} config The configuration object specifying the hyperparameters and other model settings.
-     * @param {Object} vision_encoder The ONNX session containing the vision encoder model.
-     * @param {any} prompt_encoder_mask_decoder The ONNX session containing the prompt encoder and mask decoder model.
-     */
-    constructor(config, vision_encoder, prompt_encoder_mask_decoder) {
-        super(config, vision_encoder);
-        this.prompt_encoder_mask_decoder = prompt_encoder_mask_decoder;
-    }
 
     /**
      * Compute image embeddings and positional image embeddings, given the pixel values of an image.
@@ -4427,7 +4712,7 @@ export class SamModel extends SamPreTrainedModel {
      * @typedef {Object} SamModelInputs Object containing the model inputs.
      * @property {Tensor} pixel_values Pixel values as a Tensor with shape `(batch_size, num_channels, height, width)`.
      * These can be obtained using a `SamProcessor`.
-     * @property {Tensor} input_points Input 2D spatial points with shape `(batch_size, num_points, 2)`.
+     * @property {Tensor} [input_points] Input 2D spatial points with shape `(batch_size, num_points, 2)`.
      * This is used by the prompt encoder to encode the prompt.
      * @property {Tensor} [input_labels] Input labels for the points, as a Tensor of shape `(batch_size, point_batch_size, num_points)`.
      * This is used by the prompt encoder to encode the prompt. There are 4 types of labels:
@@ -4435,6 +4720,7 @@ export class SamModel extends SamPreTrainedModel {
      *  - `0`: the point is a point that does not contain the object of interest
      *  - `-1`: the point corresponds to the background
      *  - `-10`: the point is a padding point, thus should be ignored by the prompt encoder
+     * @property {Tensor} [input_boxes] Input bounding boxes with shape `(batch_size, num_boxes, 4)`.
      * @property {Tensor} [image_embeddings] Image embeddings used by the mask decoder.
      * @property {Tensor} [image_positional_embeddings] Image positional embeddings used by the mask decoder.
      */
@@ -4452,7 +4738,7 @@ export class SamModel extends SamPreTrainedModel {
             }
         }
 
-        if (!model_inputs.input_labels) {
+        if (!model_inputs.input_labels && model_inputs.input_points) {
             // Set default input labels if they are missing
             const shape = model_inputs.input_points.dims.slice(0, -1);
             const numElements = shape.reduce((a, b) => a * b, 1);
@@ -4463,15 +4749,24 @@ export class SamModel extends SamPreTrainedModel {
             );
         }
 
+        const decoder_inputs = {
+            image_embeddings: model_inputs.image_embeddings,
+            image_positional_embeddings: model_inputs.image_positional_embeddings,
+        };
+        if (model_inputs.input_points) {
+            decoder_inputs.input_points = model_inputs.input_points;
+        }
+        if (model_inputs.input_labels) {
+            decoder_inputs.input_labels = model_inputs.input_labels;
+        }
+        if (model_inputs.input_boxes) {
+            decoder_inputs.input_boxes = model_inputs.input_boxes;
+        }
+
         // Returns:
         //  - iou_scores: tensor.float32[batch_size,point_batch_size,3]
         //  - pred_masks: tensor.float32[batch_size,point_batch_size,3,256,256]
-        return await sessionRun(this.prompt_encoder_mask_decoder, {
-            input_points: model_inputs.input_points,
-            input_labels: model_inputs.input_labels,
-            image_embeddings: model_inputs.image_embeddings,
-            image_positional_embeddings: model_inputs.image_positional_embeddings,
-        });
+        return await sessionRun(this.sessions['prompt_encoder_mask_decoder'], decoder_inputs);
     }
 
     /**
@@ -4509,29 +4804,7 @@ export class MarianPreTrainedModel extends PreTrainedModel { };
 
 export class MarianModel extends MarianPreTrainedModel { }
 
-export class MarianMTModel extends MarianPreTrainedModel {
-
-    /**
-     * Creates a new instance of the `MarianMTModel` class.
-    * @param {Object} config The model configuration object.
-    * @param {Object} session The ONNX session object.
-    * @param {any} decoder_merged_session 
-    * @param {any} generation_config 
-    */
-    constructor(config, session, decoder_merged_session, generation_config) {
-        super(config, session);
-        this.decoder_merged_session = decoder_merged_session;
-        this.generation_config = generation_config;
-
-        this.num_decoder_layers = this.config.decoder_layers;
-        this.num_decoder_heads = this.config.decoder_attention_heads;
-        this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
-
-        this.num_encoder_layers = this.config.encoder_layers;
-        this.num_encoder_heads = this.config.encoder_attention_heads;
-        this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
-    }
-}
+export class MarianMTModel extends MarianPreTrainedModel { }
 //////////////////////////////////////////////////
 
 //////////////////////////////////////////////////
@@ -4540,30 +4813,7 @@ export class M2M100PreTrainedModel extends PreTrainedModel { };
 
 export class M2M100Model extends M2M100PreTrainedModel { }
 
-export class M2M100ForConditionalGeneration extends M2M100PreTrainedModel {
-
-    /**
-     * Creates a new instance of the `M2M100ForConditionalGeneration` class.
-    * @param {Object} config The model configuration object.
-    * @param {Object} session The ONNX session object.
-    * @param {any} decoder_merged_session 
-    * @param {any} generation_config 
-    */
-    constructor(config, session, decoder_merged_session, generation_config) {
-        super(config, session);
-        this.decoder_merged_session = decoder_merged_session;
-        this.generation_config = generation_config;
-
-        this.num_decoder_layers = this.config.decoder_layers;
-        this.num_decoder_heads = this.config.decoder_attention_heads;
-        this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
-
-        this.num_encoder_layers = this.config.encoder_layers;
-        this.num_encoder_heads = this.config.encoder_attention_heads;
-        this.encoder_dim_kv = this.config.d_model / this.num_encoder_heads;
-    }
-
-}
+export class M2M100ForConditionalGeneration extends M2M100PreTrainedModel { }
 //////////////////////////////////////////////////
 
 //////////////////////////////////////////////////
@@ -4576,7 +4826,7 @@ export class Wav2Vec2PreTrainedModel extends PreTrainedModel { };
  * **Example:** Load and run a `Wav2Vec2Model` for feature extraction.
  * 
  * ```javascript
- * import { AutoProcessor, AutoModel, read_audio } from '@xenova/transformers';
+ * import { AutoProcessor, AutoModel, read_audio } from '@huggingface/transformers';
  * 
  * // Read and preprocess audio
  * const processor = await AutoProcessor.from_pretrained('Xenova/mms-300m');
@@ -4635,6 +4885,92 @@ export class Wav2Vec2ForAudioFrameClassification extends Wav2Vec2PreTrainedModel
 }
 //////////////////////////////////////////////////
 
+
+//////////////////////////////////////////////////
+// PyAnnote models
+export class PyAnnotePreTrainedModel extends PreTrainedModel { };
+
+/**
+ * The bare PyAnnote Model transformer outputting raw hidden-states without any specific head on top.
+ */
+export class PyAnnoteModel extends PyAnnotePreTrainedModel { }
+
+/**
+ * PyAnnote Model with a frame classification head on top for tasks like Speaker Diarization.
+ * 
+ * **Example:** Load and run a `PyAnnoteForAudioFrameClassification` for speaker diarization.
+ * 
+ * ```javascript
+ * import { AutoProcessor, AutoModelForAudioFrameClassification, read_audio } from '@huggingface/transformers';
+ * 
+ * // Load model and processor
+ * const model_id = 'onnx-community/pyannote-segmentation-3.0';
+ * const model = await AutoModelForAudioFrameClassification.from_pretrained(model_id);
+ * const processor = await AutoProcessor.from_pretrained(model_id);
+ * 
+ * // Read and preprocess audio
+ * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/mlk.wav';
+ * const audio = await read_audio(url, processor.feature_extractor.config.sampling_rate);
+ * const inputs = await processor(audio);
+ * 
+ * // Run model with inputs
+ * const { logits } = await model(inputs);
+ * // {
+ * //   logits: Tensor {
+ * //     dims: [ 1, 767, 7 ],  // [batch_size, num_frames, num_classes]
+ * //     type: 'float32',
+ * //     data: Float32Array(5369) [ ... ],
+ * //     size: 5369
+ * //   }
+ * // }
+ * 
+ * const result = processor.post_process_speaker_diarization(logits, audio.length);
+ * // [
+ * //   [
+ * //     { id: 0, start: 0, end: 1.0512535626298245, confidence: 0.8220156481664611 },
+ * //     { id: 2, start: 1.0512535626298245, end: 2.3398869619825127, confidence: 0.9008811707860472 },
+ * //     ...
+ * //   ]
+ * // ]
+ * 
+ * // Display result
+ * console.table(result[0], ['start', 'end', 'id', 'confidence']);
+ * // ┌─────────┬────────────────────┬────────────────────┬────┬─────────────────────┐
+ * // │ (index) │ start              │ end                │ id │ confidence          │
+ * // ├─────────┼────────────────────┼────────────────────┼────┼─────────────────────┤
+ * // │ 0       │ 0                  │ 1.0512535626298245 │ 0  │ 0.8220156481664611  │
+ * // │ 1       │ 1.0512535626298245 │ 2.3398869619825127 │ 2  │ 0.9008811707860472  │
+ * // │ 2       │ 2.3398869619825127 │ 3.5946089560890773 │ 0  │ 0.7521651315796233  │
+ * // │ 3       │ 3.5946089560890773 │ 4.578039708226655  │ 2  │ 0.8491978128022479  │
+ * // │ 4       │ 4.578039708226655  │ 4.594995410849717  │ 0  │ 0.2935352600416393  │
+ * // │ 5       │ 4.594995410849717  │ 6.121008646925269  │ 3  │ 0.6788051309866024  │
+ * // │ 6       │ 6.121008646925269  │ 6.256654267909762  │ 0  │ 0.37125512393851134 │
+ * // │ 7       │ 6.256654267909762  │ 8.630452635138397  │ 2  │ 0.7467035186353542  │
+ * // │ 8       │ 8.630452635138397  │ 10.088643060721703 │ 0  │ 0.7689364814666032  │
+ * // │ 9       │ 10.088643060721703 │ 12.58113134631177  │ 2  │ 0.9123324509131324  │
+ * // │ 10      │ 12.58113134631177  │ 13.005023911888312 │ 0  │ 0.4828358177572041  │
+ * // └─────────┴────────────────────┴────────────────────┴────┴─────────────────────┘
+ * ```
+ */
+export class PyAnnoteForAudioFrameClassification extends PyAnnotePreTrainedModel {
+    /**
+     * Calls the model on new inputs.
+     * @param {Object} model_inputs The inputs to the model.
+     * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for sequence classification.
+     */
+    async _call(model_inputs) {
+        return new TokenClassifierOutput(await super._call(model_inputs));
+    }
+}
+//////////////////////////////////////////////////
+
+//////////////////////////////////////////////////
+// WeSpeakerResNet models
+export class WeSpeakerResNetPreTrainedModel extends PreTrainedModel { };
+export class WeSpeakerResNetModel extends WeSpeakerResNetPreTrainedModel { }
+//////////////////////////////////////////////////
+
+
 //////////////////////////////////////////////////
 // UniSpeech models
 export class UniSpeechPreTrainedModel extends PreTrainedModel { };
@@ -4773,7 +5109,7 @@ export class HubertPreTrainedModel extends PreTrainedModel { }
  * **Example:** Load and run a `HubertModel` for feature extraction.
  * 
  * ```javascript
- * import { AutoProcessor, AutoModel, read_audio } from '@xenova/transformers';
+ * import { AutoProcessor, AutoModel, read_audio } from '@huggingface/transformers';
  * 
  * // Read and preprocess audio
  * const processor = await AutoProcessor.from_pretrained('Xenova/hubert-base-ls960');
@@ -4837,7 +5173,7 @@ export class WavLMPreTrainedModel extends PreTrainedModel { };
  * **Example:** Load and run a `WavLMModel` for feature extraction.
  * 
  * ```javascript
- * import { AutoProcessor, AutoModel, read_audio } from '@xenova/transformers';
+ * import { AutoProcessor, AutoModel, read_audio } from '@huggingface/transformers';
  * 
  * // Read and preprocess audio
  * const processor = await AutoProcessor.from_pretrained('Xenova/wavlm-base');
@@ -4892,7 +5228,7 @@ export class WavLMForSequenceClassification extends WavLMPreTrainedModel {
  * 
  * **Example:** Extract speaker embeddings with `WavLMForXVector`.
  * ```javascript
- * import { AutoProcessor, AutoModel, read_audio } from '@xenova/transformers';
+ * import { AutoProcessor, AutoModel, read_audio } from '@huggingface/transformers';
  * 
  * // Read and preprocess audio
  * const processor = await AutoProcessor.from_pretrained('Xenova/wavlm-base-plus-sv');
@@ -4935,7 +5271,7 @@ export class WavLMForXVector extends WavLMPreTrainedModel {
  * 
  * **Example:** Perform speaker diarization with `WavLMForAudioFrameClassification`.
  * ```javascript
- * import { AutoProcessor, AutoModelForAudioFrameClassification, read_audio } from '@xenova/transformers';
+ * import { AutoProcessor, AutoModelForAudioFrameClassification, read_audio } from '@huggingface/transformers';
  * 
  * // Read and preprocess audio
  * const processor = await AutoProcessor.from_pretrained('Xenova/wavlm-base-plus-sd');
@@ -4995,16 +5331,16 @@ export class SpeechT5Model extends SpeechT5PreTrainedModel { };
  * 
  * **Example:** Generate speech from text with `SpeechT5ForSpeechToText`.
  * ```javascript
- * import { AutoTokenizer, AutoProcessor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, Tensor } from '@xenova/transformers';
+ * import { AutoTokenizer, AutoProcessor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, Tensor } from '@huggingface/transformers';
  * 
  * // Load the tokenizer and processor
  * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/speecht5_tts');
  * const processor = await AutoProcessor.from_pretrained('Xenova/speecht5_tts');
  * 
  * // Load the models
- * // NOTE: We use the unquantized versions as they are more accurate
- * const model = await SpeechT5ForTextToSpeech.from_pretrained('Xenova/speecht5_tts', { quantized: false });
- * const vocoder = await SpeechT5HifiGan.from_pretrained('Xenova/speecht5_hifigan', { quantized: false });
+ * // NOTE: We use the full-precision versions as they are more accurate
+ * const model = await SpeechT5ForTextToSpeech.from_pretrained('Xenova/speecht5_tts', { dtype: 'fp32' });
+ * const vocoder = await SpeechT5HifiGan.from_pretrained('Xenova/speecht5_hifigan', { dtype: 'fp32' });
  * 
  * // Load speaker embeddings from URL
  * const speaker_embeddings_data = new Float32Array(
@@ -5037,27 +5373,6 @@ export class SpeechT5ForSpeechToText extends SpeechT5PreTrainedModel { }
  */
 export class SpeechT5ForTextToSpeech extends SpeechT5PreTrainedModel {
 
-    /**
-     * Creates a new instance of the `SpeechT5ForTextToSpeech` class.
-     * @param {Object} config The model configuration.
-     * @param {any} session session for the model.
-     * @param {any} decoder_merged_session session for the decoder.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, decoder_merged_session, generation_config) {
-        super(config, session);
-        this.decoder_merged_session = decoder_merged_session;
-        this.generation_config = generation_config;
-
-        this.num_decoder_layers = this.config.decoder_layers;
-        this.num_decoder_heads = this.config.decoder_attention_heads;
-        this.decoder_dim_kv = this.config.hidden_size / this.num_decoder_heads;
-
-        this.num_encoder_layers = this.config.encoder_layers;
-        this.num_encoder_heads = this.config.encoder_attention_heads;
-        this.encoder_dim_kv = this.config.hidden_size / this.num_encoder_heads;
-    }
-
     /**
      * @typedef {Object} SpeechOutput
      * @property {Tensor} [spectrogram] The predicted log-mel spectrogram of shape
@@ -5127,7 +5442,7 @@ export class SpeechT5ForTextToSpeech extends SpeechT5PreTrainedModel {
             };
 
             this.addPastKeyValues(decoderFeeds, past_key_values);
-            decoder_outputs = await sessionRun(this.decoder_merged_session, decoderFeeds);
+            decoder_outputs = await sessionRun(this.sessions['decoder_model_merged'], decoderFeeds);
             past_key_values = this.getPastKeyValues(decoder_outputs, past_key_values);
 
             const { prob, spectrum } = decoder_outputs;
@@ -5142,7 +5457,7 @@ export class SpeechT5ForTextToSpeech extends SpeechT5PreTrainedModel {
         }
 
         const spectrogram = cat(spectrogramParts);
-        const { waveform } = await sessionRun(vocoder.session, { spectrogram });
+        const { waveform } = await sessionRun(vocoder.sessions['model'], { spectrogram });
 
         return {
             spectrogram,
@@ -5165,25 +5480,7 @@ export class SpeechT5HifiGan extends PreTrainedModel {
 
 //////////////////////////////////////////////////
 // TrOCR models
-export class TrOCRPreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `TrOCRPreTrainedModel` class.
-     * @param {Object} config The configuration of the model.
-     * @param {any} session The ONNX session containing the model weights.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id;
-
-        this.num_encoder_layers = this.num_decoder_layers = this.config.decoder_layers;
-        this.num_encoder_heads = this.num_decoder_heads = this.config.decoder_attention_heads;
-        this.encoder_dim_kv = this.decoder_dim_kv = this.config.d_model / this.num_decoder_heads;
-    }
-}
+export class TrOCRPreTrainedModel extends PreTrainedModel { }
 
 /**
  * The TrOCR Decoder with a language modeling head.
@@ -5198,25 +5495,7 @@ export class TrOCRForCausalLM extends TrOCRPreTrainedModel { }
 /**
  * The bare Mistral Model outputting raw hidden-states without any specific head on top.
  */
-export class MistralPreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `MistralPreTrainedModel` class.
-     * @param {Object} config The configuration of the model.
-     * @param {any} session The ONNX session containing the model weights.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
-
-        this.num_heads = this.config.num_key_value_heads;
-        this.num_layers = this.config.num_hidden_layers;
-        this.dim_kv = this.config.hidden_size / this.config.num_attention_heads;
-    }
-}
+export class MistralPreTrainedModel extends PreTrainedModel { }
 
 export class MistralModel extends MistralPreTrainedModel { }
 
@@ -5229,25 +5508,7 @@ export class MistralForCausalLM extends MistralPreTrainedModel { }
 /**
  * The bare Starcoder2 Model outputting raw hidden-states without any specific head on top.
  */
-export class Starcoder2PreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `Starcoder2PreTrainedModel` class.
-     * @param {Object} config The configuration of the model.
-     * @param {any} session The ONNX session containing the model weights.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
-
-        this.num_heads = this.config.num_key_value_heads;
-        this.num_layers = this.config.num_hidden_layers;
-        this.dim_kv = this.config.hidden_size / this.config.num_attention_heads;
-    }
-}
+export class Starcoder2PreTrainedModel extends PreTrainedModel { }
 
 export class Starcoder2Model extends Starcoder2PreTrainedModel { }
 
@@ -5260,25 +5521,7 @@ export class Starcoder2ForCausalLM extends Starcoder2PreTrainedModel { }
 /**
  * The bare Falcon Model outputting raw hidden-states without any specific head on top.
  */
-export class FalconPreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `FalconPreTrainedModel` class.
-     * @param {Object} config The configuration of the model.
-     * @param {any} session The ONNX session containing the model weights.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
-
-        this.num_heads = this.config.num_attention_heads;
-        this.num_layers = this.config.num_hidden_layers;
-        this.dim_kv = this.config.hidden_size / this.config.num_attention_heads;
-    }
-}
+export class FalconPreTrainedModel extends PreTrainedModel { }
 
 export class FalconModel extends FalconPreTrainedModel { }
 
@@ -5298,7 +5541,7 @@ export class ClapModel extends ClapPreTrainedModel { }
  * **Example:** Compute text embeddings with `ClapTextModelWithProjection`.
  * 
  * ```javascript
- * import { AutoTokenizer, ClapTextModelWithProjection } from '@xenova/transformers';
+ * import { AutoTokenizer, ClapTextModelWithProjection } from '@huggingface/transformers';
  * 
  * // Load tokenizer and text model
  * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/clap-htsat-unfused');
@@ -5334,7 +5577,7 @@ export class ClapTextModelWithProjection extends ClapPreTrainedModel {
  * **Example:** Compute audio embeddings with `ClapAudioModelWithProjection`.
  * 
  * ```javascript
- * import { AutoProcessor, ClapAudioModelWithProjection, read_audio } from '@xenova/transformers';
+ * import { AutoProcessor, ClapAudioModelWithProjection, read_audio } from '@huggingface/transformers';
  * 
  * // Load processor and audio model
  * const processor = await AutoProcessor.from_pretrained('Xenova/clap-htsat-unfused');
@@ -5374,7 +5617,7 @@ export class VitsPreTrainedModel extends PreTrainedModel { }
  * 
  * **Example:** Generate speech from text with `VitsModel`.
  * ```javascript
- * import { AutoTokenizer, VitsModel } from '@xenova/transformers';
+ * import { AutoTokenizer, VitsModel } from '@huggingface/transformers';
  * 
  * // Load the tokenizer and model
  * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/mms-tts-eng');
@@ -5428,25 +5671,7 @@ export class SegformerForSemanticSegmentation extends SegformerPreTrainedModel {
 
 //////////////////////////////////////////////////
 // StableLm models
-export class StableLmPreTrainedModel extends PreTrainedModel {
-    /**
-     * Creates a new instance of the `StableLmPreTrainedModel` class.
-     * @param {Object} config The configuration of the model.
-     * @param {any} session The ONNX session containing the model weights.
-     * @param {GenerationConfig} generation_config The generation configuration.
-     */
-    constructor(config, session, generation_config) {
-        super(config, session);
-        this.generation_config = generation_config;
-
-        // config doesn't contain pad_token_id, so we assume it is the eos_token_id
-        this.config.pad_token_id = this.config.eos_token_id
-
-        this.num_heads = this.config.num_attention_heads;
-        this.num_layers = this.config.num_hidden_layers;
-        this.dim_kv = this.config.hidden_size / this.num_heads;
-    }
-}
+export class StableLmPreTrainedModel extends PreTrainedModel { }
 
 /**
  * The bare StableLm Model transformer outputting raw hidden-states without any specific head on top.
@@ -5481,6 +5706,237 @@ export class EfficientNetForImageClassification extends EfficientNetPreTrainedMo
 }
 //////////////////////////////////////////////////
 
+//////////////////////////////////////////////////
+// Musicgen models
+export class MusicgenPreTrainedModel extends PreTrainedModel { }
+
+/**
+ * The bare Musicgen decoder model outputting raw hidden-states without any specific head on top.
+ */
+export class MusicgenModel extends MusicgenPreTrainedModel { }
+
+/**
+ * The MusicGen decoder model with a language modelling head on top.
+ */
+export class MusicgenForCausalLM extends MusicgenPreTrainedModel { }
+
+/**
+ * The composite MusicGen model with a text encoder, audio encoder and Musicgen decoder,
+ * for music generation tasks with one or both of text and audio prompts.
+ * 
+ * **Example:** Generate music from text with `Xenova/musicgen-small`.
+ * ```javascript
+ * import { AutoTokenizer, MusicgenForConditionalGeneration } from '@huggingface/transformers';
+ * 
+ * // Load tokenizer and model
+ * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/musicgen-small');
+ * const model = await MusicgenForConditionalGeneration.from_pretrained(
+ *   'Xenova/musicgen-small', { dtype: 'fp32' }
+ * );
+ * 
+ * // Prepare text input
+ * const prompt = '80s pop track with bassy drums and synth';
+ * const inputs = tokenizer(prompt);
+ * 
+ * // Generate audio
+ * const audio_values = await model.generate({
+ *   ...inputs,
+ *   max_new_tokens: 512,
+ *   do_sample: true,
+ *   guidance_scale: 3,
+ * });
+ * 
+ * // (Optional) Write the output to a WAV file
+ * import wavefile from 'wavefile';
+ * import fs from 'fs';
+ * 
+ * const wav = new wavefile.WaveFile();
+ * wav.fromScratch(1, model.config.audio_encoder.sampling_rate, '32f', audio_values.data);
+ * fs.writeFileSync('musicgen_out.wav', wav.toBuffer());
+ * ```
+ */
+export class MusicgenForConditionalGeneration extends PreTrainedModel { // NOTE: not MusicgenPreTrainedModel
+    forward_params = [
+        'input_ids',
+        'attention_mask',
+        'encoder_outputs',
+        'decoder_input_ids',
+        'decoder_attention_mask',
+        'past_key_values',
+    ];
+
+    /**
+     * Apply the pattern mask to the final ids,
+     * then revert the pattern delay mask by filtering the pad token id in a single step.
+     * @param {Tensor} outputs The output tensor from the model.
+     * @returns {Tensor} The filtered output tensor.
+     */
+    _apply_and_filter_by_delay_pattern_mask(outputs) {
+        const [bs_x_codebooks, seqLength] = outputs.dims;
+        const num_codebooks = this.config.decoder.num_codebooks;
+        const upperBound = (seqLength - num_codebooks);
+
+        let newDataSize = 0;
+        for (let i = 0; i < outputs.size; ++i) {
+            if (outputs.data[i] === this.config.decoder.pad_token_id) {
+                continue;
+            }
+
+            const row = (i % seqLength);
+            const col = Math.floor(i / seqLength) % num_codebooks;
+
+            const diff = row - col;
+            if (diff > 0 && diff <= upperBound) {
+                outputs.data[newDataSize++] = outputs.data[i];
+            }
+        }
+
+        const batch_size = Math.floor(bs_x_codebooks / num_codebooks);
+        const inferred = newDataSize / (batch_size * num_codebooks);
+        // TODO: assert `inferred` is an integer
+        return new Tensor(
+            outputs.type,
+            outputs.data.slice(0, newDataSize),
+            [batch_size, num_codebooks, inferred]
+        );
+    }
+
+
+    prepare_inputs_for_generation(input_ids, model_inputs, generation_config) {
+        // apply the delay pattern mask
+        let clonedInputIds = structuredClone(input_ids);
+        for (let i = 0; i < clonedInputIds.length; ++i) {
+            for (let j = 0; j < clonedInputIds[i].length; ++j) {
+                if ((i % this.config.decoder.num_codebooks) >= j) {
+                    clonedInputIds[i][j] = BigInt(this.config.decoder.pad_token_id);
+                }
+            }
+        }
+        // for classifier free guidance we need to replicate the decoder args across the batch dim
+        // (we'll split these before sampling)
+        if (generation_config.guidance_scale !== null && generation_config.guidance_scale > 1) {
+            // [batch, seqLength] -> [2 * batch, seqLength]
+            clonedInputIds = clonedInputIds.concat(clonedInputIds);
+        }
+
+        const prepped = super.prepare_inputs_for_generation(clonedInputIds, model_inputs, generation_config);
+        return prepped;
+    }
+
+    /**
+     * Generates sequences of token ids for models with a language modeling head.
+     * @param {import('./generation/parameters.js').GenerationFunctionParameters} options
+     * @returns {Promise<ModelOutput|Tensor>} The output of the model, which can contain the generated token ids, attentions, and scores.
+     */
+    async generate(options) {
+
+        const output_ids = await super.generate(options);
+
+        // apply the pattern mask to the final ids
+        // tensor: int64[1,batch_size,4,chunk_length]
+        const audio_codes = this._apply_and_filter_by_delay_pattern_mask(
+            /** @type {Tensor} */(output_ids)
+        ).unsqueeze_(0); // append the frame dimension back to the audio codes
+
+        const { audio_values } = await sessionRun(this.sessions['encodec_decode'], { audio_codes })
+
+        return audio_values;
+    }
+}
+//////////////////////////////////////////////////
+
+//////////////////////////////////////////////////
+// MobileNetV1 models
+export class MobileNetV1PreTrainedModel extends PreTrainedModel { }
+
+/**
+ * The bare MobileNetV1 model outputting raw hidden-states without any specific head on top.
+ */
+export class MobileNetV1Model extends MobileNetV1PreTrainedModel { }
+
+/**
+ * MobileNetV1 model with an image classification head on top (a linear layer on top of the pooled features),
+ * e.g. for ImageNet.
+ */
+export class MobileNetV1ForImageClassification extends MobileNetV1PreTrainedModel {
+    /**
+     * @param {any} model_inputs
+     */
+    async _call(model_inputs) {
+        return new SequenceClassifierOutput(await super._call(model_inputs));
+    }
+}
+//////////////////////////////////////////////////
+
+//////////////////////////////////////////////////
+// MobileNetV2 models
+export class MobileNetV2PreTrainedModel extends PreTrainedModel { }
+
+/**
+ * The bare MobileNetV2 model outputting raw hidden-states without any specific head on top.
+ */
+export class MobileNetV2Model extends MobileNetV2PreTrainedModel { }
+
+/**
+ * MobileNetV2 model with an image classification head on top (a linear layer on top of the pooled features),
+ * e.g. for ImageNet.
+ */
+export class MobileNetV2ForImageClassification extends MobileNetV2PreTrainedModel {
+    /**
+     * @param {any} model_inputs
+     */
+    async _call(model_inputs) {
+        return new SequenceClassifierOutput(await super._call(model_inputs));
+    }
+}
+//////////////////////////////////////////////////
+
+//////////////////////////////////////////////////
+// MobileNetV3 models
+export class MobileNetV3PreTrainedModel extends PreTrainedModel { }
+
+/**
+ * The bare MobileNetV3 model outputting raw hidden-states without any specific head on top.
+ */
+export class MobileNetV3Model extends MobileNetV3PreTrainedModel { }
+
+/**
+ * MobileNetV3 model with an image classification head on top (a linear layer on top of the pooled features),
+ * e.g. for ImageNet.
+ */
+export class MobileNetV3ForImageClassification extends MobileNetV3PreTrainedModel {
+    /**
+     * @param {any} model_inputs
+     */
+    async _call(model_inputs) {
+        return new SequenceClassifierOutput(await super._call(model_inputs));
+    }
+}
+//////////////////////////////////////////////////
+
+//////////////////////////////////////////////////
+// MobileNetV4 models
+export class MobileNetV4PreTrainedModel extends PreTrainedModel { }
+
+/**
+ * The bare MobileNetV4 model outputting raw hidden-states without any specific head on top.
+ */
+export class MobileNetV4Model extends MobileNetV4PreTrainedModel { }
+
+/**
+ * MobileNetV4 model with an image classification head on top (a linear layer on top of the pooled features),
+ * e.g. for ImageNet.
+ */
+export class MobileNetV4ForImageClassification extends MobileNetV4PreTrainedModel {
+    /**
+     * @param {any} model_inputs
+     */
+    async _call(model_inputs) {
+        return new SequenceClassifierOutput(await super._call(model_inputs));
+    }
+}
+//////////////////////////////////////////////////
+
 //////////////////////////////////////////////////
 // Decision Transformer models
 export class DecisionTransformerPreTrainedModel extends PreTrainedModel { }
@@ -5515,38 +5971,42 @@ export class PretrainedMixin {
     static BASE_IF_FAIL = false;
 
 
-    /** @type {PreTrainedModel.from_pretrained} */
+    /** @type {typeof PreTrainedModel.from_pretrained} */
     static async from_pretrained(pretrained_model_name_or_path, {
-        quantized = true,
         progress_callback = null,
         config = null,
         cache_dir = null,
         local_files_only = false,
         revision = 'main',
         model_file_name = null,
+        subfolder = 'onnx',
+        device = null,
+        dtype = null,
+        use_external_data_format = null,
+        session_options = {},
     } = {}) {
 
-        let options = {
-            quantized,
+        const options = {
             progress_callback,
             config,
             cache_dir,
             local_files_only,
             revision,
             model_file_name,
+            subfolder,
+            device,
+            dtype,
+            use_external_data_format,
+            session_options,
         }
-        config = await AutoConfig.from_pretrained(pretrained_model_name_or_path, options);
-        if (!options.config) {
-            // If no config was passed, reuse this config for future processing
-            options.config = config;
-        }
+        options.config = await AutoConfig.from_pretrained(pretrained_model_name_or_path, options);
 
         if (!this.MODEL_CLASS_MAPPINGS) {
             throw new Error("`MODEL_CLASS_MAPPINGS` not implemented for this type of `AutoClass`: " + this.name);
         }
 
-        for (let MODEL_CLASS_MAPPING of this.MODEL_CLASS_MAPPINGS) {
-            const modelInfo = MODEL_CLASS_MAPPING.get(config.model_type);
+        for (const MODEL_CLASS_MAPPING of this.MODEL_CLASS_MAPPINGS) {
+            const modelInfo = MODEL_CLASS_MAPPING.get(options.config.model_type);
             if (!modelInfo) {
                 continue; // Item not found in this mapping
             }
@@ -5554,10 +6014,10 @@ export class PretrainedMixin {
         }
 
         if (this.BASE_IF_FAIL) {
-            console.warn(`Unknown model class "${config.model_type}", attempting to construct from base class.`);
+            console.warn(`Unknown model class "${options.config.model_type}", attempting to construct from base class.`);
             return await PreTrainedModel.from_pretrained(pretrained_model_name_or_path, options);
         } else {
-            throw Error(`Unsupported model type: ${config.model_type}`)
+            throw Error(`Unsupported model type: ${options.config.model_type}`)
         }
     }
 }
@@ -5593,10 +6053,17 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
     ['wavlm', ['WavLMModel', WavLMModel]],
     ['audio-spectrogram-transformer', ['ASTModel', ASTModel]],
     ['vits', ['VitsModel', VitsModel]],
+    ['pyannote', ['PyAnnoteModel', PyAnnoteModel]],
+    ['wespeaker-resnet', ['WeSpeakerResNetModel', WeSpeakerResNetModel]],
 
     ['detr', ['DetrModel', DetrModel]],
+    ['rt_detr', ['RTDetrModel', RTDetrModel]],
     ['table-transformer', ['TableTransformerModel', TableTransformerModel]],
     ['vit', ['ViTModel', ViTModel]],
+    ['pvt', ['PvtModel', PvtModel]],
+    ['vit_msn', ['ViTMSNModel', ViTMSNModel]],
+    ['vit_mae', ['ViTMAEModel', ViTMAEModel]],
+    ['groupvit', ['GroupViTModel', GroupViTModel]],
     ['fastvit', ['FastViTModel', FastViTModel]],
     ['mobilevit', ['MobileViTModel', MobileViTModel]],
     ['mobilevitv2', ['MobileViTV2Model', MobileViTV2Model]],
@@ -5604,6 +6071,7 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
     ['owlv2', ['Owlv2Model', Owlv2Model]],
     ['beit', ['BeitModel', BeitModel]],
     ['deit', ['DeiTModel', DeiTModel]],
+    ['hiera', ['HieraModel', HieraModel]],
     ['convnext', ['ConvNextModel', ConvNextModel]],
     ['convnextv2', ['ConvNextV2Model', ConvNextV2Model]],
     ['dinov2', ['Dinov2Model', Dinov2Model]],
@@ -5619,6 +6087,13 @@ const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
     ['efficientnet', ['EfficientNetModel', EfficientNetModel]],
 
     ['decision_transformer', ['DecisionTransformerModel', DecisionTransformerModel]],
+
+    ['mobilenet_v1', ['MobileNetV1Model', MobileNetV1Model]],
+    ['mobilenet_v2', ['MobileNetV2Model', MobileNetV2Model]],
+    ['mobilenet_v3', ['MobileNetV3Model', MobileNetV3Model]],
+    ['mobilenet_v4', ['MobileNetV4Model', MobileNetV4Model]],
+
+    ['maskformer', ['MaskFormerModel', MaskFormerModel]],
 ]);
 
 const MODEL_MAPPING_NAMES_ENCODER_DECODER = new Map([
@@ -5637,6 +6112,7 @@ const MODEL_MAPPING_NAMES_ENCODER_DECODER = new Map([
 
 const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
     ['bloom', ['BloomModel', BloomModel]],
+    ['jais', ['JAISModel', JAISModel]],
     ['gpt2', ['GPT2Model', GPT2Model]],
     ['gptj', ['GPTJModel', GPTJModel]],
     ['gpt_bigcode', ['GPTBigCodeModel', GPTBigCodeModel]],
@@ -5644,13 +6120,20 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
     ['gpt_neox', ['GPTNeoXModel', GPTNeoXModel]],
     ['codegen', ['CodeGenModel', CodeGenModel]],
     ['llama', ['LlamaModel', LlamaModel]],
+    ['granite', ['GraniteModel', GraniteModel]],
+    ['cohere', ['CohereModel', CohereModel]],
+    ['gemma', ['GemmaModel', GemmaModel]],
+    ['gemma2', ['Gemma2Model', Gemma2Model]],
+    ['openelm', ['OpenELMModel', OpenELMModel]],
     ['qwen2', ['Qwen2Model', Qwen2Model]],
     ['phi', ['PhiModel', PhiModel]],
+    ['phi3', ['Phi3Model', Phi3Model]],
     ['mpt', ['MptModel', MptModel]],
     ['opt', ['OPTModel', OPTModel]],
     ['mistral', ['MistralModel', MistralModel]],
     ['starcoder2', ['Starcoder2Model', Starcoder2Model]],
     ['falcon', ['FalconModel', FalconModel]],
+    ['stablelm', ['StableLmModel', StableLmModel]],
 ]);
 
 const MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES = new Map([
@@ -5664,6 +6147,7 @@ const MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING_NAMES = new Map([
 
 const MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING_NAMES = new Map([
     ['vits', ['VitsModel', VitsModel]],
+    ['musicgen', ['MusicgenForConditionalGeneration', MusicgenForConditionalGeneration]],
 ]);
 
 const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = new Map([
@@ -5715,17 +6199,24 @@ const MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES = new Map([
     ['blenderbot-small', ['BlenderbotSmallForConditionalGeneration', BlenderbotSmallForConditionalGeneration]],
 ]);
 
-const MODEL_WITH_LM_HEAD_MAPPING_NAMES = new Map([
+const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([
     ['bloom', ['BloomForCausalLM', BloomForCausalLM]],
     ['gpt2', ['GPT2LMHeadModel', GPT2LMHeadModel]],
+    ['jais', ['JAISLMHeadModel', JAISLMHeadModel]],
     ['gptj', ['GPTJForCausalLM', GPTJForCausalLM]],
     ['gpt_bigcode', ['GPTBigCodeForCausalLM', GPTBigCodeForCausalLM]],
     ['gpt_neo', ['GPTNeoForCausalLM', GPTNeoForCausalLM]],
     ['gpt_neox', ['GPTNeoXForCausalLM', GPTNeoXForCausalLM]],
     ['codegen', ['CodeGenForCausalLM', CodeGenForCausalLM]],
     ['llama', ['LlamaForCausalLM', LlamaForCausalLM]],
+    ['granite', ['GraniteForCausalLM', GraniteForCausalLM]],
+    ['cohere', ['CohereForCausalLM', CohereForCausalLM]],
+    ['gemma', ['GemmaForCausalLM', GemmaForCausalLM]],
+    ['gemma2', ['Gemma2ForCausalLM', Gemma2ForCausalLM]],
+    ['openelm', ['OpenELMForCausalLM', OpenELMForCausalLM]],
     ['qwen2', ['Qwen2ForCausalLM', Qwen2ForCausalLM]],
     ['phi', ['PhiForCausalLM', PhiForCausalLM]],
+    ['phi3', ['Phi3ForCausalLM', Phi3ForCausalLM]],
     ['mpt', ['MptForCausalLM', MptForCausalLM]],
     ['opt', ['OPTForCausalLM', OPTForCausalLM]],
     ['mbart', ['MBartForCausalLM', MBartForCausalLM]],
@@ -5777,17 +6268,26 @@ const MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES = new Map([
     ['vision-encoder-decoder', ['VisionEncoderDecoderModel', VisionEncoderDecoderModel]],
 ]);
 
+const MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES = new Map([
+    ['llava', ['LlavaForConditionalGeneration', LlavaForConditionalGeneration]],
+    ['moondream1', ['Moondream1ForConditionalGeneration', Moondream1ForConditionalGeneration]],
+    ['florence2', ['Florence2ForConditionalGeneration', Florence2ForConditionalGeneration]],
+]);
+
 const MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
     ['vision-encoder-decoder', ['VisionEncoderDecoderModel', VisionEncoderDecoderModel]],
 ]);
 
 const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = new Map([
     ['vit', ['ViTForImageClassification', ViTForImageClassification]],
+    ['pvt', ['PvtForImageClassification', PvtForImageClassification]],
+    ['vit_msn', ['ViTMSNForImageClassification', ViTMSNForImageClassification]],
     ['fastvit', ['FastViTForImageClassification', FastViTForImageClassification]],
     ['mobilevit', ['MobileViTForImageClassification', MobileViTForImageClassification]],
     ['mobilevitv2', ['MobileViTV2ForImageClassification', MobileViTV2ForImageClassification]],
     ['beit', ['BeitForImageClassification', BeitForImageClassification]],
     ['deit', ['DeiTForImageClassification', DeiTForImageClassification]],
+    ['hiera', ['HieraForImageClassification', HieraForImageClassification]],
     ['convnext', ['ConvNextForImageClassification', ConvNextForImageClassification]],
     ['convnextv2', ['ConvNextV2ForImageClassification', ConvNextV2ForImageClassification]],
     ['dinov2', ['Dinov2ForImageClassification', Dinov2ForImageClassification]],
@@ -5795,10 +6295,15 @@ const MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES = new Map([
     ['swin', ['SwinForImageClassification', SwinForImageClassification]],
     ['segformer', ['SegformerForImageClassification', SegformerForImageClassification]],
     ['efficientnet', ['EfficientNetForImageClassification', EfficientNetForImageClassification]],
+    ['mobilenet_v1', ['MobileNetV1ForImageClassification', MobileNetV1ForImageClassification]],
+    ['mobilenet_v2', ['MobileNetV2ForImageClassification', MobileNetV2ForImageClassification]],
+    ['mobilenet_v3', ['MobileNetV3ForImageClassification', MobileNetV3ForImageClassification]],
+    ['mobilenet_v4', ['MobileNetV4ForImageClassification', MobileNetV4ForImageClassification]],
 ]);
 
 const MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES = new Map([
     ['detr', ['DetrForObjectDetection', DetrForObjectDetection]],
+    ['rt_detr', ['RTDetrForObjectDetection', RTDetrForObjectDetection]],
     ['table-transformer', ['TableTransformerForObjectDetection', TableTransformerForObjectDetection]],
     ['yolos', ['YolosForObjectDetection', YolosForObjectDetection]],
 ]);
@@ -5809,12 +6314,19 @@ const MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES = new Map([
 ]);
 
 const MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES = new Map([
+    // TODO: Do not add new models here
     ['detr', ['DetrForSegmentation', DetrForSegmentation]],
     ['clipseg', ['CLIPSegForImageSegmentation', CLIPSegForImageSegmentation]],
 ]);
 
 const MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES = new Map([
     ['segformer', ['SegformerForSemanticSegmentation', SegformerForSemanticSegmentation]],
+    ['sapiens', ['SapiensForSemanticSegmentation', SapiensForSemanticSegmentation]],
+]);
+
+const MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES = new Map([
+    ['detr', ['DetrForSegmentation', DetrForSegmentation]],
+    ['maskformer', ['MaskFormerForInstanceSegmentation', MaskFormerForInstanceSegmentation]],
 ]);
 
 const MODEL_FOR_MASK_GENERATION_MAPPING_NAMES = new Map([
@@ -5848,6 +6360,7 @@ const MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING_NAMES = new Map([
     ['unispeech-sat', ['UniSpeechSatForAudioFrameClassification', UniSpeechSatForAudioFrameClassification]],
     ['wavlm', ['WavLMForAudioFrameClassification', WavLMForAudioFrameClassification]],
     ['wav2vec2', ['Wav2Vec2ForAudioFrameClassification', Wav2Vec2ForAudioFrameClassification]],
+    ['pyannote', ['PyAnnoteForAudioFrameClassification', PyAnnoteForAudioFrameClassification]],
 ]);
 
 const MODEL_FOR_IMAGE_MATTING_MAPPING_NAMES = new Map([
@@ -5862,6 +6375,12 @@ const MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES = new Map([
     ['dpt', ['DPTForDepthEstimation', DPTForDepthEstimation]],
     ['depth_anything', ['DepthAnythingForDepthEstimation', DepthAnythingForDepthEstimation]],
     ['glpn', ['GLPNForDepthEstimation', GLPNForDepthEstimation]],
+    ['sapiens', ['SapiensForDepthEstimation', SapiensForDepthEstimation]],
+    ['depth_pro', ['DepthProForDepthEstimation', DepthProForDepthEstimation]],
+])
+
+const MODEL_FOR_NORMAL_ESTIMATION_MAPPING_NAMES = new Map([
+    ['sapiens', ['SapiensForNormalEstimation', SapiensForNormalEstimation]],
 ])
 
 // NOTE: This is custom to Transformers.js, and is necessary because certain models
@@ -5879,16 +6398,19 @@ const MODEL_CLASS_TYPE_MAPPING = [
     [MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
     [MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES, MODEL_TYPES.Seq2Seq],
     [MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES, MODEL_TYPES.Seq2Seq],
-    [MODEL_WITH_LM_HEAD_MAPPING_NAMES, MODEL_TYPES.DecoderOnly],
+    [MODEL_FOR_CAUSAL_LM_MAPPING_NAMES, MODEL_TYPES.DecoderOnly],
     [MODEL_FOR_MASKED_LM_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
     [MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
     [MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES, MODEL_TYPES.Vision2Seq],
+    [MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES, MODEL_TYPES.ImageTextToText],
     [MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
     [MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+    [MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
     [MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
     [MODEL_FOR_IMAGE_MATTING_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
     [MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
     [MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
+    [MODEL_FOR_NORMAL_ESTIMATION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
     [MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
     [MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES, MODEL_TYPES.EncoderOnly],
     [MODEL_FOR_MASK_GENERATION_MAPPING_NAMES, MODEL_TYPES.MaskGeneration],
@@ -5913,6 +6435,10 @@ for (const [mappings, type] of MODEL_CLASS_TYPE_MAPPING) {
 }
 
 const CUSTOM_MAPPING = [
+    // OVERRIDE:
+    // TODO: Refactor to allow class to specify model
+    ['MusicgenForConditionalGeneration', MusicgenForConditionalGeneration, MODEL_TYPES.Musicgen],
+
     ['CLIPTextModelWithProjection', CLIPTextModelWithProjection, MODEL_TYPES.EncoderOnly],
     ['SiglipTextModel', SiglipTextModel, MODEL_TYPES.EncoderOnly],
     ['ClapTextModelWithProjection', ClapTextModelWithProjection, MODEL_TYPES.EncoderOnly],
@@ -5930,7 +6456,7 @@ for (const [name, model, type] of CUSTOM_MAPPING) {
  * The chosen model class is determined by the type specified in the model config.
  * 
  * @example
- * let model = await AutoModel.from_pretrained('bert-base-uncased');
+ * let model = await AutoModel.from_pretrained('Xenova/bert-base-uncased');
  */
 export class AutoModel extends PretrainedMixin {
     /** @type {Map<string, Object>[]} */
@@ -5944,7 +6470,7 @@ export class AutoModel extends PretrainedMixin {
  * The chosen model class is determined by the type specified in the model config.
  * 
  * @example
- * let model = await AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased-finetuned-sst-2-english');
+ * let model = await AutoModelForSequenceClassification.from_pretrained('Xenova/distilbert-base-uncased-finetuned-sst-2-english');
  */
 export class AutoModelForSequenceClassification extends PretrainedMixin {
     static MODEL_CLASS_MAPPINGS = [MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES];
@@ -5955,7 +6481,7 @@ export class AutoModelForSequenceClassification extends PretrainedMixin {
  * The chosen model class is determined by the type specified in the model config.
  * 
  * @example
- * let model = await AutoModelForTokenClassification.from_pretrained('Davlan/distilbert-base-multilingual-cased-ner-hrl');
+ * let model = await AutoModelForTokenClassification.from_pretrained('Xenova/distilbert-base-multilingual-cased-ner-hrl');
  */
 export class AutoModelForTokenClassification extends PretrainedMixin {
     static MODEL_CLASS_MAPPINGS = [MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES];
@@ -5966,7 +6492,7 @@ export class AutoModelForTokenClassification extends PretrainedMixin {
  * The chosen model class is determined by the type specified in the model config.
  * 
  * @example
- * let model = await AutoModelForSeq2SeqLM.from_pretrained('t5-small');
+ * let model = await AutoModelForSeq2SeqLM.from_pretrained('Xenova/t5-small');
  */
 export class AutoModelForSeq2SeqLM extends PretrainedMixin {
     static MODEL_CLASS_MAPPINGS = [MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES];
@@ -6010,10 +6536,10 @@ export class AutoModelForTextToWaveform extends PretrainedMixin {
  * The chosen model class is determined by the type specified in the model config.
  * 
  * @example
- * let model = await AutoModelForCausalLM.from_pretrained('gpt2');
+ * let model = await AutoModelForCausalLM.from_pretrained('Xenova/gpt2');
  */
 export class AutoModelForCausalLM extends PretrainedMixin {
-    static MODEL_CLASS_MAPPINGS = [MODEL_WITH_LM_HEAD_MAPPING_NAMES];
+    static MODEL_CLASS_MAPPINGS = [MODEL_FOR_CAUSAL_LM_MAPPING_NAMES];
 }
 
 /**
@@ -6021,7 +6547,7 @@ export class AutoModelForCausalLM extends PretrainedMixin {
  * The chosen model class is determined by the type specified in the model config.
  * 
  * @example
- * let model = await AutoModelForMaskedLM.from_pretrained('bert-base-uncased');
+ * let model = await AutoModelForMaskedLM.from_pretrained('Xenova/bert-base-uncased');
  */
 export class AutoModelForMaskedLM extends PretrainedMixin {
     static MODEL_CLASS_MAPPINGS = [MODEL_FOR_MASKED_LM_MAPPING_NAMES];
@@ -6032,7 +6558,7 @@ export class AutoModelForMaskedLM extends PretrainedMixin {
  * The chosen model class is determined by the type specified in the model config.
  * 
  * @example
- * let model = await AutoModelForQuestionAnswering.from_pretrained('distilbert-base-cased-distilled-squad');
+ * let model = await AutoModelForQuestionAnswering.from_pretrained('Xenova/distilbert-base-cased-distilled-squad');
  */
 export class AutoModelForQuestionAnswering extends PretrainedMixin {
     static MODEL_CLASS_MAPPINGS = [MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES];
@@ -6043,7 +6569,7 @@ export class AutoModelForQuestionAnswering extends PretrainedMixin {
  * The chosen model class is determined by the type specified in the model config.
  * 
  * @example
- * let model = await AutoModelForVision2Seq.from_pretrained('nlpconnect/vit-gpt2-image-captioning');
+ * let model = await AutoModelForVision2Seq.from_pretrained('Xenova/vit-gpt2-image-captioning');
  */
 export class AutoModelForVision2Seq extends PretrainedMixin {
     static MODEL_CLASS_MAPPINGS = [MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES];
@@ -6054,7 +6580,7 @@ export class AutoModelForVision2Seq extends PretrainedMixin {
  * The chosen model class is determined by the type specified in the model config.
  * 
  * @example
- * let model = await AutoModelForImageClassification.from_pretrained('google/vit-base-patch16-224');
+ * let model = await AutoModelForImageClassification.from_pretrained('Xenova/vit-base-patch16-224');
  */
 export class AutoModelForImageClassification extends PretrainedMixin {
     static MODEL_CLASS_MAPPINGS = [MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES];
@@ -6065,7 +6591,7 @@ export class AutoModelForImageClassification extends PretrainedMixin {
  * The chosen model class is determined by the type specified in the model config.
  * 
  * @example
- * let model = await AutoModelForImageSegmentation.from_pretrained('facebook/detr-resnet-50-panoptic');
+ * let model = await AutoModelForImageSegmentation.from_pretrained('Xenova/detr-resnet-50-panoptic');
  */
 export class AutoModelForImageSegmentation extends PretrainedMixin {
     static MODEL_CLASS_MAPPINGS = [MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES];
@@ -6082,12 +6608,23 @@ export class AutoModelForSemanticSegmentation extends PretrainedMixin {
     static MODEL_CLASS_MAPPINGS = [MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES];
 }
 
+/**
+ * Helper class which is used to instantiate pretrained universal image segmentation models with the `from_pretrained` function.
+ * The chosen model class is determined by the type specified in the model config.
+ * 
+ * @example
+ * let model = await AutoModelForUniversalSegmentation.from_pretrained('hf-internal-testing/tiny-random-MaskFormerForInstanceSegmentation');
+ */
+export class AutoModelForUniversalSegmentation extends PretrainedMixin {
+    static MODEL_CLASS_MAPPINGS = [MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMES];
+}
+
 /**
  * Helper class which is used to instantiate pretrained object detection models with the `from_pretrained` function.
  * The chosen model class is determined by the type specified in the model config.
  * 
  * @example
- * let model = await AutoModelForObjectDetection.from_pretrained('facebook/detr-resnet-50');
+ * let model = await AutoModelForObjectDetection.from_pretrained('Xenova/detr-resnet-50');
  */
 export class AutoModelForObjectDetection extends PretrainedMixin {
     static MODEL_CLASS_MAPPINGS = [MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES];
@@ -6141,6 +6678,10 @@ export class AutoModelForDepthEstimation extends PretrainedMixin {
     static MODEL_CLASS_MAPPINGS = [MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES];
 }
 
+export class AutoModelForNormalEstimation extends PretrainedMixin {
+    static MODEL_CLASS_MAPPINGS = [MODEL_FOR_NORMAL_ESTIMATION_MAPPING_NAMES];
+}
+
 export class AutoModelForImageFeatureExtraction extends PretrainedMixin {
     static MODEL_CLASS_MAPPINGS = [MODEL_FOR_IMAGE_FEATURE_EXTRACTION_MAPPING_NAMES];
 }
diff --git a/src/models/whisper/common_whisper.js b/src/models/whisper/common_whisper.js
new file mode 100644
index 000000000..df4cce4d5
--- /dev/null
+++ b/src/models/whisper/common_whisper.js
@@ -0,0 +1,151 @@
+
+
+const WHISPER_LANGUAGES = [
+    ["en", "english"],
+    ["zh", "chinese"],
+    ["de", "german"],
+    ["es", "spanish"],
+    ["ru", "russian"],
+    ["ko", "korean"],
+    ["fr", "french"],
+    ["ja", "japanese"],
+    ["pt", "portuguese"],
+    ["tr", "turkish"],
+    ["pl", "polish"],
+    ["ca", "catalan"],
+    ["nl", "dutch"],
+    ["ar", "arabic"],
+    ["sv", "swedish"],
+    ["it", "italian"],
+    ["id", "indonesian"],
+    ["hi", "hindi"],
+    ["fi", "finnish"],
+    ["vi", "vietnamese"],
+    ["he", "hebrew"],
+    ["uk", "ukrainian"],
+    ["el", "greek"],
+    ["ms", "malay"],
+    ["cs", "czech"],
+    ["ro", "romanian"],
+    ["da", "danish"],
+    ["hu", "hungarian"],
+    ["ta", "tamil"],
+    ["no", "norwegian"],
+    ["th", "thai"],
+    ["ur", "urdu"],
+    ["hr", "croatian"],
+    ["bg", "bulgarian"],
+    ["lt", "lithuanian"],
+    ["la", "latin"],
+    ["mi", "maori"],
+    ["ml", "malayalam"],
+    ["cy", "welsh"],
+    ["sk", "slovak"],
+    ["te", "telugu"],
+    ["fa", "persian"],
+    ["lv", "latvian"],
+    ["bn", "bengali"],
+    ["sr", "serbian"],
+    ["az", "azerbaijani"],
+    ["sl", "slovenian"],
+    ["kn", "kannada"],
+    ["et", "estonian"],
+    ["mk", "macedonian"],
+    ["br", "breton"],
+    ["eu", "basque"],
+    ["is", "icelandic"],
+    ["hy", "armenian"],
+    ["ne", "nepali"],
+    ["mn", "mongolian"],
+    ["bs", "bosnian"],
+    ["kk", "kazakh"],
+    ["sq", "albanian"],
+    ["sw", "swahili"],
+    ["gl", "galician"],
+    ["mr", "marathi"],
+    ["pa", "punjabi"],
+    ["si", "sinhala"],
+    ["km", "khmer"],
+    ["sn", "shona"],
+    ["yo", "yoruba"],
+    ["so", "somali"],
+    ["af", "afrikaans"],
+    ["oc", "occitan"],
+    ["ka", "georgian"],
+    ["be", "belarusian"],
+    ["tg", "tajik"],
+    ["sd", "sindhi"],
+    ["gu", "gujarati"],
+    ["am", "amharic"],
+    ["yi", "yiddish"],
+    ["lo", "lao"],
+    ["uz", "uzbek"],
+    ["fo", "faroese"],
+    ["ht", "haitian creole"],
+    ["ps", "pashto"],
+    ["tk", "turkmen"],
+    ["nn", "nynorsk"],
+    ["mt", "maltese"],
+    ["sa", "sanskrit"],
+    ["lb", "luxembourgish"],
+    ["my", "myanmar"],
+    ["bo", "tibetan"],
+    ["tl", "tagalog"],
+    ["mg", "malagasy"],
+    ["as", "assamese"],
+    ["tt", "tatar"],
+    ["haw", "hawaiian"],
+    ["ln", "lingala"],
+    ["ha", "hausa"],
+    ["ba", "bashkir"],
+    ["jw", "javanese"],
+    ["su", "sundanese"],
+]
+
+// @ts-ignore
+export const WHISPER_LANGUAGE_MAPPING = new Map(WHISPER_LANGUAGES);
+// @ts-ignore
+export const WHISPER_TO_LANGUAGE_CODE_MAPPING = new Map([
+    ...WHISPER_LANGUAGES.map(([k, v]) => [v, k]),
+    ...[
+        ["burmese", "my"],
+        ["valencian", "ca"],
+        ["flemish", "nl"],
+        ["haitian", "ht"],
+        ["letzeburgesch", "lb"],
+        ["pushto", "ps"],
+        ["panjabi", "pa"],
+        ["moldavian", "ro"],
+        ["moldovan", "ro"],
+        ["sinhalese", "si"],
+        ["castilian", "es"],
+    ]
+]);
+
+/**
+ * @param {string} language The language name or code
+ * @returns {string} The language code
+ */
+export function whisper_language_to_code(language) {
+    language = language.toLowerCase();
+
+    // Map to code from user-friendly name (e.g., "english" -> "en")
+    let language_code = WHISPER_TO_LANGUAGE_CODE_MAPPING.get(language);
+
+    if (language_code === undefined) {
+        // User provided something that is not a language name
+
+        if (WHISPER_LANGUAGE_MAPPING.has(language)) {
+            // User provided the language code directly (e.g., "en")
+            language_code = language;
+
+        } else {
+            // User provided something that is not a language code or name
+            const is_language_code = language.length === 2;
+            const langs = is_language_code ? WHISPER_LANGUAGE_MAPPING.keys() : WHISPER_LANGUAGE_MAPPING.values();
+
+            throw new Error(`Language "${language}" is not supported. Must be one of: ${JSON.stringify(langs)}`);
+        }
+    }
+    return language_code;
+}
diff --git a/src/models/whisper/generation_whisper.js b/src/models/whisper/generation_whisper.js
new file mode 100644
index 000000000..690455ff7
--- /dev/null
+++ b/src/models/whisper/generation_whisper.js
@@ -0,0 +1,89 @@
+import { GenerationConfig } from "../../generation/configuration_utils.js";
+
+export class WhisperGenerationConfig extends GenerationConfig {
+
+    /**
+     * Whether to return the timestamps with the text. This enables the `WhisperTimestampsLogitsProcessor`.
+     * @type {boolean}
+     */
+    return_timestamps = null;
+
+    /**
+     * Whether to return token-level timestamps
+     * with the text. This can be used with or without the `return_timestamps` option. To get word-level
+     * timestamps, use the tokenizer to group the tokens into words.
+     * @type {boolean}
+     */
+    return_token_timestamps = null;
+
+    /**
+     * The number of audio frames available in this chunk. This is only used generating word-level timestamps.
+     * @type {number}
+     */
+    num_frames = null;
+
+    /**
+     * Alignment heads to predict word-level timestamps. This is a list of [layer, head] pairs that
+     * select the cross-attention heads that are highly correlated to word-level timing.
+     * @type {[number, number][]}
+     */
+    alignment_heads = null;
+
+    /**
+     * Task to use for generation, either "translate" or "transcribe".
+     * @type {string}
+     */
+    task = null;
+
+    /**
+     * Language token to use for generation, can be either in the form of `<|en|>`, `en` or `english`.
+     * You can find all the possible language tokens in the `model.generation_config.lang_to_id` dictionary.
+     * @type {string}
+     */
+    language = null;
+
+    /**
+     * The id of the `"<|notimestamps|>"` token.
+     * @type {number}
+     */
+    no_timestamps_token_id = null;
+
+    /**
+     * Rank-1 list of token IDs created by passing text to [`~WhisperProcessor.get_prompt_ids`] that is
+     * provided as a prompt to each chunk. This can be used to provide or "prompt-engineer" a context for
+     * transcription, e.g. custom vocabularies or proper nouns to make it more likely to predict those words
+     * correctly. It cannot be used in conjunction with `decoder_start_token_id` as it overwrites this value.
+     * @type {number[]}
+     */
+    prompt_ids = null;
+
+    /**
+     * Whether the model is multilingual or not.
+     * @type {boolean}
+     */
+    is_multilingual = null;
+
+    /**
+     * (Optional) A mapping from language tokens to their corresponding IDs.
+     * Only required if the model is multilingual.
+     * @type {Record<string, number>|null}
+     */
+    lang_to_id = null;
+
+    /**
+     * (Optional) A mapping from task tokens to their corresponding IDs.
+     * @type {Record<string, number>|null}
+     */
+    task_to_id = null;
+
+    /**
+     * Used to set the maximum value of the initial timestamp. This is used to prevent the model from
+     * predicting timestamps that are too far in the future.
+     * @type {number}
+     */
+    max_initial_timestamp_index = 1;
+}
+
+/**
+ * @typedef {import('../../generation/parameters.js').GenerationFunctionParameters & {generation_config: WhisperGenerationConfig} & WhisperGenerationConfig} WhisperGenerationFunctionParameters
+ */
diff --git a/src/ops/registry.js b/src/ops/registry.js
new file mode 100644
index 000000000..9b65fa4a8
--- /dev/null
+++ b/src/ops/registry.js
@@ -0,0 +1,103 @@
+import { createInferenceSession } from "../backends/onnx.js";
+import { Tensor } from "../utils/tensor.js";
+
+/**
+ * Asynchronously creates a wrapper function for running an ONNX inference session.
+ *
+ * @param {number[]} session_bytes The session data in bytes.
+ * @param {import('onnxruntime-common').InferenceSession.SessionOptions} session_options The options for the ONNX session.
+ * @template {string | [string] | string[]} T
+ * @param {T} names The name(s) of the output tensor(s).
+ * 
+ * @returns {Promise<function(Record<string, Tensor>): Promise<T extends string ? Tensor : T extends string[] ? { [K in keyof T]: Tensor } : never>>}
+ * The wrapper function for running the ONNX inference session.
+ */
+const wrap = async (session_bytes, session_options, names) => {
+    const session = await createInferenceSession(
+        new Uint8Array(session_bytes), session_options,
+    );
+    return /** @type {any} */(async (/** @type {Record<string, Tensor>} */ inputs) => {
+        const ortFeed = Object.fromEntries(Object.entries(inputs).map(([k, v]) => [k, v.ort_tensor]));
+        const outputs = await session.run(ortFeed);
+
+        if (Array.isArray(names)) {
+            return names.map((n) => new Tensor(outputs[n]));
+        } else {
+            return new Tensor(outputs[/** @type {string} */(names)]);
+        }
+    })
+}
+
+// In-memory registry of initialized ONNX operators
+export class TensorOpRegistry {
+    static session_options = {
+        // TODO: Allow for multiple execution providers
+        // executionProviders: ['webgpu'],
+    };
+
+    static get bilinear_interpolate_4d() {
+        if (!this._bilinear_interpolate_4d) {
+            this._bilinear_interpolate_4d = wrap(
+                [8, 9, 18, 0, 58, 128, 1, 10, 40, 10, 1, 120, 10, 0, 10, 0, 10, 1, 115, 18, 1, 121, 34, 6, 82, 101, 115, 105, 122, 101, 42, 17, 10, 4, 109, 111, 100, 101, 34, 6, 108, 105, 110, 101, 97, 114, 160, 1, 3, 18, 1, 114, 90, 31, 10, 1, 120, 18, 26, 10, 24, 8, 1, 18, 20, 10, 3, 18, 1, 98, 10, 3, 18, 1, 99, 10, 3, 18, 1, 104, 10, 3, 18, 1, 119, 90, 15, 10, 1, 115, 18, 10, 10, 8, 8, 7, 18, 4, 10, 2, 8, 4, 98, 31, 10, 1, 121, 18, 26, 10, 24, 8, 1, 18, 20, 10, 3, 18, 1, 98, 10, 3, 18, 1, 99, 10, 3, 18, 1, 104, 10, 3, 18, 1, 119, 66, 2, 16, 20],
+                this.session_options,
+                'y',
+            );
+        }
+        return this._bilinear_interpolate_4d;
+    }
+
+    static get bicubic_interpolate_4d() {
+        if (!this._bicubic_interpolate_4d) {
+            this._bicubic_interpolate_4d = wrap(
+                [8, 9, 18, 0, 58, 127, 10, 39, 10, 1, 120, 10, 0, 10, 0, 10, 1, 115, 18, 1, 121, 34, 6, 82, 101, 115, 105, 122, 101, 42, 16, 10, 4, 109, 111, 100, 101, 34, 5, 99, 117, 98, 105, 99, 160, 1, 3, 18, 1, 114, 90, 31, 10, 1, 120, 18, 26, 10, 24, 8, 1, 18, 20, 10, 3, 18, 1, 98, 10, 3, 18, 1, 99, 10, 3, 18, 1, 104, 10, 3, 18, 1, 119, 90, 15, 10, 1, 115, 18, 10, 10, 8, 8, 7, 18, 4, 10, 2, 8, 4, 98, 31, 10, 1, 121, 18, 26, 10, 24, 8, 1, 18, 20, 10, 3, 18, 1, 98, 10, 3, 18, 1, 99, 10, 3, 18, 1, 104, 10, 3, 18, 1, 119, 66, 2, 16, 20],
+                this.session_options,
+                'y',
+            );
+        }
+        return this._bicubic_interpolate_4d;
+    }
+
+    static get matmul() {
+        if (!this._matmul) {
+            this._matmul = wrap(
+                [8, 9, 18, 0, 58, 55, 10, 17, 10, 1, 97, 10, 1, 98, 18, 1, 99, 34, 6, 77, 97, 116, 77, 117, 108, 18, 1, 114, 90, 9, 10, 1, 97, 18, 4, 10, 2, 8, 1, 90, 9, 10, 1, 98, 18, 4, 10, 2, 8, 1, 98, 9, 10, 1, 99, 18, 4, 10, 2, 8, 1, 66, 2, 16, 20],
+                this.session_options,
+                'c',
+            );
+        }
+        return this._matmul;
+    }
+
+    static get stft() {
+        if (!this._stft) {
+            this._stft = wrap(
+                [8, 7, 18, 0, 58, 148, 1, 10, 38, 10, 1, 115, 10, 1, 106, 10, 1, 119, 10, 1, 108, 18, 1, 111, 34, 4, 83, 84, 70, 84, 42, 15, 10, 8, 111, 110, 101, 115, 105, 100, 101, 100, 24, 1, 160, 1, 2, 18, 1, 115, 90, 26, 10, 1, 115, 18, 21, 10, 19, 8, 1, 18, 15, 10, 3, 18, 1, 98, 10, 3, 18, 1, 115, 10, 3, 18, 1, 99, 90, 11, 10, 1, 106, 18, 6, 10, 4, 8, 7, 18, 0, 90, 16, 10, 1, 119, 18, 11, 10, 9, 8, 1, 18, 5, 10, 3, 18, 1, 119, 90, 11, 10, 1, 108, 18, 6, 10, 4, 8, 7, 18, 0, 98, 31, 10, 1, 111, 18, 26, 10, 24, 8, 1, 18, 20, 10, 3, 18, 1, 98, 10, 3, 18, 1, 102, 10, 3, 18, 1, 100, 10, 3, 18, 1, 99, 66, 2, 16, 17],
+                this.session_options,
+                'o',
+            )
+        }
+        return this._stft;
+    }
+
+    static get rfft() {
+        if (!this._rfft) {
+            this._rfft = wrap(
+                [8, 9, 18, 0, 58, 97, 10, 33, 10, 1, 120, 10, 0, 10, 1, 97, 18, 1, 121, 34, 3, 68, 70, 84, 42, 15, 10, 8, 111, 110, 101, 115, 105, 100, 101, 100, 24, 1, 160, 1, 2, 18, 1, 100, 90, 21, 10, 1, 120, 18, 16, 10, 14, 8, 1, 18, 10, 10, 3, 18, 1, 115, 10, 3, 18, 1, 99, 90, 11, 10, 1, 97, 18, 6, 10, 4, 8, 7, 18, 0, 98, 21, 10, 1, 121, 18, 16, 10, 14, 8, 1, 18, 10, 10, 3, 18, 1, 115, 10, 3, 18, 1, 99, 66, 2, 16, 20],
+                this.session_options,
+                'y',
+            )
+        }
+        return this._rfft;
+    }
+
+    static get top_k() {
+        if (!this._top_k) {
+            this._top_k = wrap(
+                [8, 10, 18, 0, 58, 73, 10, 18, 10, 1, 120, 10, 1, 107, 18, 1, 118, 18, 1, 105, 34, 4, 84, 111, 112, 75, 18, 1, 116, 90, 9, 10, 1, 120, 18, 4, 10, 2, 8, 1, 90, 15, 10, 1, 107, 18, 10, 10, 8, 8, 7, 18, 4, 10, 2, 8, 1, 98, 9, 10, 1, 118, 18, 4, 10, 2, 8, 1, 98, 9, 10, 1, 105, 18, 4, 10, 2, 8, 7, 66, 2, 16, 21],
+                this.session_options,
+                [ /* Values */ 'v', /* Indices */ 'i']
+            )
+        }
+        return this._top_k;
+    }
+}
diff --git a/src/pipelines.js b/src/pipelines.js
index c7772aa55..d955803e6 100644
--- a/src/pipelines.js
+++ b/src/pipelines.js
@@ -3,7 +3,7 @@
  * 
  * **Example:** Instantiate pipeline using the `pipeline` function.
  * ```javascript
- * import { pipeline } from '@xenova/transformers';
+ * import { pipeline } from '@huggingface/transformers';
  * 
  * const classifier = await pipeline('sentiment-analysis');
  * const output = await classifier('I love transformers!');
@@ -34,6 +34,7 @@ import {
     AutoModelForImageClassification,
     AutoModelForImageSegmentation,
     AutoModelForSemanticSegmentation,
+    AutoModelForUniversalSegmentation,
     AutoModelForObjectDetection,
     AutoModelForZeroShotObjectDetection,
     AutoModelForDocumentQuestionAnswering,
@@ -47,9 +48,11 @@ import {
     Processor
 } from './processors.js';
 
-
 import {
     Callable,
+} from './utils/generic.js';
+
+import {
     dispatchCallback,
     pop,
     product,
@@ -57,7 +60,6 @@ import {
 import {
     softmax,
     max,
-    getTopItems,
     round,
 } from './utils/maths.js';
 import {
@@ -68,6 +70,7 @@ import {
     mean_pooling,
     interpolate,
     quantize_embeddings,
+    topk,
 } from './utils/tensor.js';
 import { RawImage } from './utils/image.js';
 
@@ -218,7 +221,7 @@ export class Pipeline extends Callable {
  * @typedef {TextClassificationSingle[]} TextClassificationOutput
  * 
  * @typedef {Object} TextClassificationPipelineOptions Parameters specific to text classification pipelines.
- * @property {number} [topk=1] The number of top predictions to be returned.
+ * @property {number} [top_k=1] The number of top predictions to be returned.
  * 
  * @callback TextClassificationPipelineCallback Classify the text(s) given as inputs.
  * @param {string|string[]} texts The input text(s) to be classified.
@@ -241,7 +244,7 @@ export class Pipeline extends Callable {
  * **Example:** Multilingual sentiment-analysis w/ `Xenova/bert-base-multilingual-uncased-sentiment` (and return top 5 classes).
  * ```javascript
  * const classifier = await pipeline('sentiment-analysis', 'Xenova/bert-base-multilingual-uncased-sentiment');
- * const output = await classifier('Le meilleur film de tous les temps.', { topk: 5 });
+ * const output = await classifier('Le meilleur film de tous les temps.', { top_k: 5 });
  * // [
  * //   { label: '5 stars', score: 0.9610759615898132 },
  * //   { label: '4 stars', score: 0.03323351591825485 },
@@ -254,7 +257,7 @@ export class Pipeline extends Callable {
  * **Example:** Toxic comment classification w/ `Xenova/toxic-bert` (and return all classes).
  * ```javascript
  * const classifier = await pipeline('text-classification', 'Xenova/toxic-bert');
- * const output = await classifier('I hate you!', { topk: null });
+ * const output = await classifier('I hate you!', { top_k: null });
  * // [
  * //   { label: 'toxic', score: 0.9593140482902527 },
  * //   { label: 'insult', score: 0.16187334060668945 },
@@ -277,7 +280,7 @@ export class TextClassificationPipeline extends (/** @type {new (options: TextPi
 
     /** @type {TextClassificationPipelineCallback} */
     async _call(texts, {
-        topk = 1
+        top_k = 1
     } = {}) {
 
         // Run tokenization
@@ -292,28 +295,35 @@ export class TextClassificationPipeline extends (/** @type {new (options: TextPi
         // TODO: Use softmax tensor function
         const function_to_apply =
             this.model.config.problem_type === 'multi_label_classification'
-                ? batch => batch.sigmoid().data
-                : batch => softmax(batch.data); // single_label_classification (default)
+                ? batch => batch.sigmoid()
+                : batch => new Tensor(
+                    'float32',
+                    softmax(batch.data),
+                    batch.dims,
+                ); // single_label_classification (default)
 
         const id2label = this.model.config.id2label;
 
         const toReturn = [];
         for (const batch of outputs.logits) {
             const output = function_to_apply(batch);
-            const scores = getTopItems(output, topk);
 
-            const vals = scores.map(x => ({
-                label: id2label[x[0]],
-                score: x[1],
+            const scores = await topk(output, top_k);
+
+            const values = scores[0].tolist();
+            const indices = scores[1].tolist();
+            const vals = indices.map((x, i) => ({
+                label: id2label ? id2label[x] : `LABEL_${x}`,
+                score: values[i],
             }));
-            if (topk === 1) {
+            if (top_k === 1) {
                 toReturn.push(...vals);
             } else {
                 toReturn.push(vals);
             }
         }
 
-        return Array.isArray(texts) || topk === 1 ? /** @type {TextClassificationOutput} */ (toReturn) : /** @type {TextClassificationOutput[]} */ (toReturn)[0];
+        return Array.isArray(texts) || top_k === 1 ? /** @type {TextClassificationOutput} */ (toReturn) : /** @type {TextClassificationOutput[]} */ (toReturn)[0];
     }
 }
 
@@ -428,9 +438,9 @@ export class TokenClassificationPipeline extends (/** @type {new (options: TextP
                     index: j,
                     word: word,
 
-                    // TODO: null for now, but will add
-                    start: null,
-                    end: null,
+                    // TODO: Add support for start and end
+                    // start: null,
+                    // end: null,
                 });
             }
             toReturn.push(tokens);
@@ -447,7 +457,7 @@ export class TokenClassificationPipeline extends (/** @type {new (options: TextP
  * @property {string} answer The answer to the question.
  * 
  * @typedef {Object} QuestionAnsweringPipelineOptions Parameters specific to question answering pipelines.
- * @property {number} [topk=1] The number of top answer predictions to be returned.
+ * @property {number} [top_k=1] The number of top answer predictions to be returned.
  * 
  * @callback QuestionAnsweringPipelineCallback Answer the question(s) given as inputs by using the context(s).
  * @param {string|string[]} question One or several question(s) (must be used in conjunction with the `context` argument).
@@ -485,7 +495,7 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
 
     /** @type {QuestionAnsweringPipelineCallback} */
     async _call(question, context, {
-        topk = 1
+        top_k = 1
     } = {}) {
 
         // Run tokenization
@@ -495,30 +505,70 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
             truncation: true,
         });
 
-        const output = await this.model(inputs);
+        const { start_logits, end_logits } = await this.model(inputs);
+        const input_ids = inputs.input_ids.tolist();
+        const attention_mask = inputs.attention_mask.tolist();
+
+        // TODO: add support for `return_special_tokens_mask`
+        const special_tokens = this.tokenizer.all_special_ids;
 
         /** @type {QuestionAnsweringOutput[]} */
         const toReturn = [];
-        for (let j = 0; j < output.start_logits.dims[0]; ++j) {
-            const ids = inputs.input_ids[j];
-            const sepIndex = ids.indexOf(this.tokenizer.sep_token_id);
+        for (let j = 0; j < start_logits.dims[0]; ++j) {
+            const ids = input_ids[j];
+            const sepIndex = ids.findIndex(x =>
+                // We use == to match bigint with number
+                // @ts-ignore
+                x == this.tokenizer.sep_token_id
+            );
 
-            const s1 = Array.from(softmax(output.start_logits[j].data))
-                .map((x, i) => [x, i])
-                .filter(x => x[1] > sepIndex);
-            const e1 = Array.from(softmax(output.end_logits[j].data))
-                .map((x, i) => [x, i])
-                .filter(x => x[1] > sepIndex);
 
-            const options = product(s1, e1)
+            const valid_mask = attention_mask[j].map((y, ix) => (
+                y == 1
+                && (
+                    ix === 0 // is cls_token
+                    || (
+                        ix > sepIndex
+                        && special_tokens.findIndex(x => x == ids[ix]) === -1 // token is not a special token (special_tokens_mask == 0)
+                    )
+                )
+            ));
+
+            const start = start_logits[j].tolist();
+            const end = end_logits[j].tolist();
+
+            // Now, we mask out values that can't be in the answer
+            // NOTE: We keep the cls_token unmasked (some models use it to indicate unanswerable questions)
+            for (let i = 1; i < start.length; ++i) {
+                if (
+                    attention_mask[j] == 0 // is part of padding
+                    || i <= sepIndex // is before the sep_token
+                    || special_tokens.findIndex(x => x == ids[i]) !== -1 // Is a special token
+                ) {
+                    // Make sure non-context indexes in the tensor cannot contribute to the softmax
+                    start[i] = -Infinity;
+                    end[i] = -Infinity;
+                }
+            }
+
+            // Normalize logits and spans to retrieve the answer
+            const start_scores = softmax(start).map((x, i) => [x, i]);
+            const end_scores = softmax(end).map((x, i) => [x, i]);
+
+            // Mask CLS
+            start_scores[0][0] = 0;
+            end_scores[0][0] = 0;
+
+            // Generate all valid spans and select best ones
+            const options = product(start_scores, end_scores)
                 .filter(x => x[0][1] <= x[1][1])
                 .map(x => [x[0][1], x[1][1], x[0][0] * x[1][0]])
                 .sort((a, b) => b[2] - a[2]);
 
-            for (let k = 0; k < Math.min(options.length, topk); ++k) {
+            for (let k = 0; k < Math.min(options.length, top_k); ++k) {
                 const [start, end, score] = options[k];
 
-                const answer_tokens = [...ids].slice(start, end + 1)
+                const answer_tokens = ids.slice(start, end + 1)
 
                 const answer = this.tokenizer.decode(answer_tokens, {
                     skip_special_tokens: true,
@@ -532,8 +582,8 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
             }
         }
 
-        // Mimic HF's return type based on topk
-        return (topk === 1) ? toReturn[0] : toReturn;
+        // Mimic HF's return type based on top_k
+        return (top_k === 1) ? toReturn[0] : toReturn;
     }
 }
 
@@ -547,7 +597,7 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
  * @typedef {FillMaskSingle[]} FillMaskOutput
  * 
  * @typedef {Object} FillMaskPipelineOptions Parameters specific to fill mask pipelines.
- * @property {number} [topk=5] When passed, overrides the number of predictions to return.
+ * @property {number} [top_k=5] When passed, overrides the number of predictions to return.
  * 
  * @callback FillMaskPipelineCallback Fill the masked token in the text(s) given as inputs.
  * @param {string|string[]} texts One or several texts (or one list of prompts) with masked tokens.
@@ -579,7 +629,7 @@ export class QuestionAnsweringPipeline extends (/** @type {new (options: TextPip
  * **Example:** Perform masked language modelling (a.k.a. "fill-mask") with `Xenova/bert-base-cased` (and return top result).
  * ```javascript
  * const unmasker = await pipeline('fill-mask', 'Xenova/bert-base-cased');
- * const output = await unmasker('The Milky Way is a [MASK] galaxy.', { topk: 1 });
+ * const output = await unmasker('The Milky Way is a [MASK] galaxy.', { top_k: 1 });
  * // [{ token_str: 'spiral', score: 0.6299987435340881, token: 14061, sequence: 'The Milky Way is a spiral galaxy.' }]
  * ```
  */
@@ -595,7 +645,7 @@ export class FillMaskPipeline extends (/** @type {new (options: TextPipelineCons
 
     /** @type {FillMaskPipelineCallback} */
     async _call(texts, {
-        topk = 5
+        top_k = 5
     } = {}) {
 
         // Run tokenization
@@ -605,30 +655,40 @@ export class FillMaskPipeline extends (/** @type {new (options: TextPipelineCons
         });
 
         // Run model
-        const outputs = await this.model(model_inputs)
+        const { logits } = await this.model(model_inputs)
 
         const toReturn = [];
 
-        for (let i = 0; i < model_inputs.input_ids.dims[0]; ++i) {
-            const ids = model_inputs.input_ids[i];
-            const mask_token_index = ids.indexOf(this.tokenizer.mask_token_id)
-
+        /** @type {bigint[][]} */
+        const input_ids = model_inputs.input_ids.tolist();
+        for (let i = 0; i < input_ids.length; ++i) {
+            const ids = input_ids[i];
+            const mask_token_index = ids.findIndex(x =>
+                // We use == to match bigint with number
+                // @ts-ignore
+                x == this.tokenizer.mask_token_id
+            );
             if (mask_token_index === -1) {
                 throw Error(`Mask token (${this.tokenizer.mask_token}) not found in text.`)
             }
-            const logits = outputs.logits[i];
-            const itemLogits = logits[mask_token_index];
+            const itemLogits = logits[i][mask_token_index];
 
-            const scores = getTopItems(softmax(itemLogits.data), topk);
+            const scores = await topk(new Tensor(
+                'float32',
+                softmax(itemLogits.data),
+                itemLogits.dims,
+            ), top_k);
+            const values = scores[0].tolist();
+            const indices = scores[1].tolist();
 
-            toReturn.push(scores.map(x => {
-                const sequence = [...ids];
-                sequence[mask_token_index] = x[0];
+            toReturn.push(indices.map((x, i) => {
+                const sequence = ids.slice();
+                sequence[mask_token_index] = x;
 
                 return {
-                    score: x[1],
-                    token: x[0],
-                    token_str: this.tokenizer.model.vocab[x[0]],
+                    score: values[i],
+                    token: Number(x),
+                    token_str: this.tokenizer.model.vocab[x],
                     sequence: this.tokenizer.decode(sequence, { skip_special_tokens: true }),
                 }
             }));
@@ -645,7 +705,7 @@ export class FillMaskPipeline extends (/** @type {new (options: TextPipelineCons
  * 
  * @callback Text2TextGenerationPipelineCallback Generate the output text(s) using text(s) given as inputs.
  * @param {string|string[]} texts Input text for the encoder.
- * @param {import('./utils/generation.js').GenerationConfigType} [options] Additional keyword arguments to pass along to the generate method of the model.
+ * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
  * @returns {Promise<Text2TextGenerationOutput|Text2TextGenerationOutput[]>}
  * 
  * @typedef {TextPipelineConstructorArgs & Text2TextGenerationPipelineCallback & Disposable} Text2TextGenerationPipelineType
@@ -703,20 +763,19 @@ export class Text2TextGenerationPipeline extends (/** @type {new (options: TextP
             padding: true,
             truncation: true,
         }
-        let input_ids;
+        let inputs;
         if (this instanceof TranslationPipeline && '_build_translation_inputs' in tokenizer) {
             // TODO: move to Translation pipeline?
             // Currently put here to avoid code duplication
             // @ts-ignore
-            input_ids = tokenizer._build_translation_inputs(texts, tokenizer_options, generate_kwargs).input_ids;
+            inputs = tokenizer._build_translation_inputs(texts, tokenizer_options, generate_kwargs);
 
         } else {
-            input_ids = tokenizer(texts, tokenizer_options).input_ids;
+            inputs = tokenizer(texts, tokenizer_options);
         }
 
-        const outputTokenIds = await this.model.generate(input_ids, generate_kwargs);
-
-        return tokenizer.batch_decode(outputTokenIds, {
+        const outputTokenIds = await this.model.generate({ ...inputs, ...generate_kwargs });
+        return tokenizer.batch_decode(/** @type {Tensor} */(outputTokenIds), {
             skip_special_tokens: true,
         }).map(text => ({ [this._key]: text }));
     }
@@ -730,7 +789,7 @@ export class Text2TextGenerationPipeline extends (/** @type {new (options: TextP
  * 
  * @callback SummarizationPipelineCallback Summarize the text(s) given as inputs.
  * @param {string|string[]} texts One or several articles (or one list of articles) to summarize.
- * @param {import('./utils/generation.js').GenerationConfigType} [options] Additional keyword arguments to pass along to the generate method of the model.
+ * @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
  * @returns {Promise<SummarizationOutput|SummarizationOutput[]>}
  * 
  * @typedef {TextPipelineConstructorArgs & SummarizationPipelineCallback & Disposable} SummarizationPipelineType
@@ -777,7 +836,7 @@ export class SummarizationPipeline extends (/** @type {new (options: TextPipelin
  * 
  * @callback TranslationPipelineCallback Translate the text(s) given as inputs.
  * @param {string|string[]} texts Texts to be translated.
- * @param {import('./utils/generation.js').GenerationConfigType} [options] Additional keyword arguments to pass along to the generate method of the model.
+ * @param {import('./generation/configuration_utils.js').GenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
  * @returns {Promise<TranslationOutput|TranslationOutput[]>}
  * 
  * @typedef {TextPipelineConstructorArgs & TranslationPipelineCallback & Disposable} TranslationPipelineType
@@ -855,11 +914,11 @@ function isChat(x) {
  * @typedef {Object} TextGenerationSpecificParams Parameters specific to text-generation pipelines.
  * @property {boolean} [add_special_tokens] Whether or not to add special tokens when tokenizing the sequences.
  * @property {boolean} [return_full_text=true] If set to `false` only added text is returned, otherwise the full text is returned.
- * @typedef {import('./utils/generation.js').GenerationConfigType & TextGenerationSpecificParams} TextGenerationConfig
+ * @typedef {import('./generation/configuration_utils.js').GenerationConfig & TextGenerationSpecificParams} TextGenerationConfig
  * 
  * @callback TextGenerationPipelineCallback Complete the prompt(s) given as inputs.
  * @param {string|string[]|Chat|Chat[]} texts One or several prompts (or one list of prompts) to complete.
- * @param {TextGenerationConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
+ * @param {Partial<TextGenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
  * @returns {Promise<TextGenerationOutput|TextGenerationOutput[]>} An array or object containing the generated texts.
  * 
  * @typedef {TextPipelineConstructorArgs & TextGenerationPipelineCallback & Disposable} TextGenerationPipelineType
@@ -966,24 +1025,24 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
             : generate_kwargs.return_full_text ?? true;
 
         this.tokenizer.padding_side = 'left';
-        const { input_ids, attention_mask } = this.tokenizer(inputs, {
+        const text_inputs = this.tokenizer(inputs, {
             add_special_tokens,
             padding: true,
             truncation: true,
         });
 
-        const outputTokenIds = await this.model.generate(input_ids, generate_kwargs, null, {
-            inputs_attention_mask: attention_mask
-        });
+        const outputTokenIds = /** @type {Tensor} */(await this.model.generate({
+            ...text_inputs,
+            ...generate_kwargs
+        }));
 
-        let decoded = this.tokenizer.batch_decode(outputTokenIds, {
+        const decoded = this.tokenizer.batch_decode(outputTokenIds, {
             skip_special_tokens: true,
         });
 
-
         let promptLengths;
-        if (!return_full_text && input_ids.dims.at(-1) > 0) {
-            promptLengths = this.tokenizer.batch_decode(input_ids, {
+        if (!return_full_text && text_inputs.input_ids.dims.at(-1) > 0) {
+            promptLengths = this.tokenizer.batch_decode(text_inputs.input_ids, {
                 skip_special_tokens: true,
             }).map(x => x.length);
         }
@@ -991,7 +1050,7 @@ export class TextGenerationPipeline extends (/** @type {new (options: TextPipeli
         /** @type {TextGenerationOutput[]} */
         const toReturn = Array.from({ length: texts.length }, _ => []);
         for (let i = 0; i < decoded.length; ++i) {
-            const textIndex = Math.floor(i / outputTokenIds.length * texts.length);
+            const textIndex = Math.floor(i / outputTokenIds.dims[0] * texts.length);
 
             if (promptLengths) {
                 // Trim the decoded text to only include the generated part
@@ -1365,7 +1424,7 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
  * @typedef {AudioClassificationSingle[]} AudioClassificationOutput
  * 
  * @typedef {Object} AudioClassificationPipelineOptions Parameters specific to audio classification pipelines.
- * @property {number} [topk=null] The number of top labels that will be returned by the pipeline.
+ * @property {number} [top_k=5] The number of top labels that will be returned by the pipeline.
  * If the provided number is `null` or higher than the number of labels available in the model configuration,
  * it will default to the number of labels.
  * 
@@ -1400,7 +1459,7 @@ export class ImageFeatureExtractionPipeline extends (/** @type {new (options: Im
  * ```javascript
  * const classifier = await pipeline('audio-classification', 'Xenova/ast-finetuned-audioset-10-10-0.4593');
  * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cat_meow.wav';
- * const output = await classifier(url, { topk: 4 });
+ * const output = await classifier(url, { top_k: 4 });
  * // [
  * //   { label: 'Meow', score: 0.5617874264717102 },
  * //   { label: 'Cat', score: 0.22365376353263855 },
@@ -1421,11 +1480,9 @@ export class AudioClassificationPipeline extends (/** @type {new (options: Audio
 
     /** @type {AudioClassificationPipelineCallback} */
     async _call(audio, {
-        topk = null
+        top_k = 5
     } = {}) {
 
-        const single = !Array.isArray(audio);
-
         const sampling_rate = this.processor.feature_extractor.config.sampling_rate;
         const preparedAudios = await prepareAudios(audio, sampling_rate);
 
@@ -1437,20 +1494,23 @@ export class AudioClassificationPipeline extends (/** @type {new (options: Audio
             const output = await this.model(inputs);
             const logits = output.logits[0];
 
-            const scores = getTopItems(softmax(logits.data), topk);
+            const scores = await topk(new Tensor(
+                'float32',
+                softmax(logits.data),
+                logits.dims,
+            ), top_k);
 
-            const vals = scores.map(x => ({
-                label: /** @type {string} */ (id2label[x[0]]),
-                score: /** @type {number} */ (x[1]),
+            const values = scores[0].tolist();
+            const indices = scores[1].tolist();
+
+            const vals = indices.map((x, i) => ({
+                label: /** @type {string} */ (id2label ? id2label[x] : `LABEL_${x}`),
+                score: /** @type {number} */ (values[i]),
             }));
 
-            if (topk === 1) {
-                toReturn.push(...vals);
-            } else {
-                toReturn.push(vals);
-            }
-        }
-        return !single || topk === 1 ? /** @type {AudioClassificationOutput} */ (toReturn) : /** @type {AudioClassificationOutput[]} */ (toReturn)[0];
+            toReturn.push(vals);
+        };
+        return Array.isArray(audio) ? toReturn : toReturn[0];
     }
 }
 
@@ -1546,12 +1606,6 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
     }
 }
 
-/**
- * @typedef {{stride: number[], input_features: Tensor, is_last: boolean, tokens?: number[], token_timestamps?: number[]}} ChunkCallbackItem
- * @callback ChunkCallback
- * @param {ChunkCallbackItem} chunk The chunk to process.
- */
-
 /**
  * @typedef {Object} Chunk
  * @property {[number, number]} timestamp The start and end timestamp of the chunk in seconds.
@@ -1565,17 +1619,14 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
  * containing all the various text chunks identified by the model.
  * 
  * @typedef {Object} AutomaticSpeechRecognitionSpecificParams Parameters specific to automatic-speech-recognition pipelines.
- * @property {boolean|'word'} [kwargs.return_timestamps] Whether to return timestamps or not. Default is `false`.
- * @property {number} [kwargs.chunk_length_s] The length of audio chunks to process in seconds. Default is 0 (no chunking).
- * @property {number} [kwargs.stride_length_s] The length of overlap between consecutive audio chunks in seconds. If not provided, defaults to `chunk_length_s / 6`.
- * @property {ChunkCallback} [kwargs.chunk_callback] Callback function to be called with each chunk processed.
- * @property {boolean} [kwargs.force_full_sequences] Whether to force outputting full sequences or not. Default is `false`.
- * @property {string} [kwargs.language] The source language. Default is `null`, meaning it should be auto-detected. Use this to potentially improve performance if the source language is known.
- * @property {string} [kwargs.task] The task to perform. Default is `null`, meaning it should be auto-detected.
- * @property {number[][]} [kwargs.forced_decoder_ids] A list of pairs of integers which indicates a mapping from generation indices to token indices
- * that will be forced before sampling. For example, [[1, 123]] means the second generated token will always be a token of index 123.
+ * @property {boolean|'word'} [return_timestamps] Whether to return timestamps or not. Default is `false`.
+ * @property {number} [chunk_length_s] The length of audio chunks to process in seconds. Default is 0 (no chunking).
+ * @property {number} [stride_length_s] The length of overlap between consecutive audio chunks in seconds. If not provided, defaults to `chunk_length_s / 6`.
+ * @property {boolean} [force_full_sequences] Whether to force outputting full sequences or not. Default is `false`.
+ * @property {string} [language] The source language. Default is `null`, meaning it should be auto-detected. Use this to potentially improve performance if the source language is known.
+ * @property {string} [task] The task to perform. Default is `null`, meaning it should be auto-detected.
  * @property {number} [num_frames] The number of frames in the input audio.
- * @typedef {import('./utils/generation.js').GenerationConfigType & AutomaticSpeechRecognitionSpecificParams} AutomaticSpeechRecognitionConfig
+ * @typedef {import('./generation/configuration_utils.js').GenerationConfig & AutomaticSpeechRecognitionSpecificParams} AutomaticSpeechRecognitionConfig
  * 
  * @callback AutomaticSpeechRecognitionPipelineCallback Transcribe the audio sequence(s) given as inputs to text.
  * @param {AudioPipelineInputs} audio The input audio file(s) to be transcribed. The input is either:
@@ -1583,7 +1634,7 @@ export class ZeroShotAudioClassificationPipeline extends (/** @type {new (option
  * to get the waveform using the [`AudioContext`](https://developer.mozilla.org/en-US/docs/Web/API/AudioContext) API.
  * If `AudioContext` is not available, you should pass the raw waveform in as a Float32Array of shape `(n, )`.
  * - `Float32Array` or `Float64Array` of shape `(n, )`, representing the raw audio at the correct sampling rate (no further check will be done).
- * @param {AutomaticSpeechRecognitionConfig} [options] Additional keyword arguments to pass along to the generate method of the model.
+ * @param {Partial<AutomaticSpeechRecognitionConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
  * @returns {Promise<AutomaticSpeechRecognitionOutput|AutomaticSpeechRecognitionOutput[]>} An object containing the transcription text and optionally timestamps if `return_timestamps` is `true`.
  * 
  * @typedef {TextAudioPipelineConstructorArgs & AutomaticSpeechRecognitionPipelineCallback & Disposable} AutomaticSpeechRecognitionPipelineType
@@ -1687,7 +1738,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
      * @type {AutomaticSpeechRecognitionPipelineCallback}
      * @private
      */
-    async _call_wav2vec2(audio, kwargs = {}) {
+    async _call_wav2vec2(audio, kwargs) {
         // TODO use kwargs
 
         if (kwargs.language) {
@@ -1725,30 +1776,17 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
      * @type {AutomaticSpeechRecognitionPipelineCallback}
      * @private
      */
-    async _call_whisper(audio, kwargs = {}) {
-
+    async _call_whisper(audio, kwargs) {
         const return_timestamps = kwargs.return_timestamps ?? false;
         const chunk_length_s = kwargs.chunk_length_s ?? 0;
-        const chunk_callback = kwargs.chunk_callback ?? null;
         const force_full_sequences = kwargs.force_full_sequences ?? false;
         let stride_length_s = kwargs.stride_length_s ?? null;
 
-        if (return_timestamps === 'word') {
-            kwargs['return_token_timestamps'] = true;
-        }
-
-        const language = pop(kwargs, 'language', null);
-        const task = pop(kwargs, 'task', null);
+        const generation_config = { ...kwargs }
 
-        if (language || task || return_timestamps) {
-            if (kwargs.forced_decoder_ids) {
-                throw new Error("Cannot specify `language`/`task`/`return_timestamps` and `forced_decoder_ids` at the same time.")
-            }
-            // @ts-ignore
-            const decoder_prompt_ids = this.tokenizer.get_decoder_prompt_ids({ language, task, no_timestamps: !return_timestamps })
-            if (decoder_prompt_ids.length > 0) {
-                kwargs.forced_decoder_ids = decoder_prompt_ids;
-            }
+        if (return_timestamps === 'word') {
+            generation_config['return_token_timestamps'] = true;
+            generation_config['return_timestamps'] = false; // Do not predict timestamp tokens
         }
 
         const single = !Array.isArray(audio);
@@ -1764,7 +1802,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
 
         const toReturn = [];
         for (const aud of preparedAudios) {
-            /** @type {ChunkCallbackItem[]} */
+            /** @type {{stride: number[], input_features: Tensor, is_last: boolean, tokens?: bigint[], token_timestamps?: number[]}[]} */
             let chunks = [];
             if (chunk_length_s > 0) {
                 if (stride_length_s === null) {
@@ -1781,22 +1819,23 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
                 let offset = 0;
 
                 // Create subarrays of audio with overlaps
-
-                while (offset < aud.length) {
-                    const subarr = aud.subarray(offset, offset + window);
+                while (true) {
+                    const offset_end = offset + window;
+                    const subarr = aud.subarray(offset, offset_end);
                     const feature = await this.processor(subarr);
 
-                    const isFirst = offset === 0;
-                    const isLast = offset + jump >= aud.length;
+                    const is_first = offset === 0;
+                    const is_last = offset_end >= aud.length;
                     chunks.push({
                         stride: [
                             subarr.length,
-                            isFirst ? 0 : stride,
-                            isLast ? 0 : stride
+                            is_first ? 0 : stride,
+                            is_last ? 0 : stride
                         ],
                         input_features: feature.input_features,
-                        is_last: isLast
+                        is_last,
                     })
+                    if (is_last) break;
                     offset += jump;
                 }
 
@@ -1810,28 +1849,27 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
 
             // Generate for each set of input features
             for (const chunk of chunks) {
-                kwargs.num_frames = Math.floor(chunk.stride[0] / hop_length);
+                generation_config.num_frames = Math.floor(chunk.stride[0] / hop_length);
 
                 // NOTE: doing sequentially for now
-                const data = await this.model.generate(chunk.input_features, kwargs);
+                const data = await this.model.generate({
+                    inputs: chunk.input_features,
+                    ...generation_config
+                });
 
                 // TODO: Right now we only get top beam
                 if (return_timestamps === 'word') {
-                    chunk.tokens = data.sequences[0];
+                    chunk.tokens = data.sequences.tolist()[0];
                     chunk.token_timestamps = data.token_timestamps.tolist()[0].map(
                         (/** @type {number} */ x) => round(x, 2)
                     );
 
                 } else {
-                    chunk.tokens = data[0];
+                    chunk.tokens = (/** @type {Tensor} */(data))[0].tolist();
                 }
 
                 // convert stride to seconds
                 chunk.stride = chunk.stride.map(x => x / sampling_rate);
-
-                if (chunk_callback !== null) {
-                    chunk_callback(chunk)
-                }
             }
 
             // Merge text chunks
@@ -1853,7 +1891,7 @@ export class AutomaticSpeechRecognitionPipeline extends (/** @type {new (options
  * 
  * @callback ImageToTextPipelineCallback Assign labels to the image(s) passed as inputs.
  * @param {ImagePipelineInputs} texts The images to be captioned.
- * @param {import('./utils/generation.js').GenerationConfigType} [options] Additional keyword arguments to pass along to the generate method of the model.
+ * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
  * @returns {Promise<ImageToTextOutput|ImageToTextOutput[]>} An object (or array of objects) containing the generated text(s).
  * 
  * @typedef {TextImagePipelineConstructorArgs & ImageToTextPipelineCallback & Disposable} ImageToTextPipelineType
@@ -1899,8 +1937,8 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
         const toReturn = [];
         for (const batch of pixel_values) {
             batch.dims = [1, ...batch.dims]
-            const output = await this.model.generate(batch, generate_kwargs);
-            const decoded = this.tokenizer.batch_decode(output, {
+            const output = await this.model.generate({ inputs: batch, ...generate_kwargs });
+            const decoded = this.tokenizer.batch_decode(/** @type {Tensor} */(output), {
                 skip_special_tokens: true,
             }).map(x => ({ generated_text: x.trim() }))
             toReturn.push(decoded);
@@ -1917,7 +1955,7 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
  * @typedef {ImageClassificationSingle[]} ImageClassificationOutput
  * 
  * @typedef {Object} ImageClassificationPipelineOptions Parameters specific to image classification pipelines.
- * @property {number} [topk=1] The number of top labels that will be returned by the pipeline. 
+ * @property {number} [top_k=1] The number of top labels that will be returned by the pipeline. 
  * 
  * @callback ImageClassificationPipelineCallback Assign labels to the image(s) passed as inputs.
  * @param {ImagePipelineInputs} images The input images(s) to be classified.
@@ -1945,7 +1983,7 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
  * ```javascript
  * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
  * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/tiger.jpg';
- * const output = await classifier(url, { topk: 3 });
+ * const output = await classifier(url, { top_k: 3 });
  * // [
  * //   { label: 'tiger, Panthera tigris', score: 0.632695734500885 },
  * //   { label: 'tiger cat', score: 0.3634825646877289 },
@@ -1957,7 +1995,7 @@ export class ImageToTextPipeline extends (/** @type {new (options: TextImagePipe
  * ```javascript
  * const classifier = await pipeline('image-classification', 'Xenova/vit-base-patch16-224');
  * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/tiger.jpg';
- * const output = await classifier(url, { topk: 0 });
+ * const output = await classifier(url, { top_k: 0 });
  * // [
  * //   { label: 'tiger, Panthera tigris', score: 0.632695734500885 },
  * //   { label: 'tiger cat', score: 0.3634825646877289 },
@@ -1979,32 +2017,36 @@ export class ImageClassificationPipeline extends (/** @type {new (options: Image
 
     /** @type {ImageClassificationPipelineCallback} */
     async _call(images, {
-        topk = 1
+        top_k = 5
     } = {}) {
 
-        const isBatched = Array.isArray(images);
         const preparedImages = await prepareImages(images);
 
         const { pixel_values } = await this.processor(preparedImages);
         const output = await this.model({ pixel_values });
 
         const id2label = this.model.config.id2label;
+
+        /** @type {ImageClassificationOutput[]} */
         const toReturn = [];
         for (const batch of output.logits) {
-            const scores = getTopItems(softmax(batch.data), topk);
+            const scores = await topk(new Tensor(
+                'float32',
+                softmax(batch.data),
+                batch.dims,
+            ), top_k);
 
-            const vals = scores.map(x => ({
-                label: id2label[x[0]],
-                score: x[1],
+            const values = scores[0].tolist();
+            const indices = scores[1].tolist();
+
+            const vals = indices.map((x, i) => ({
+                label: /** @type {string} */ (id2label ? id2label[x] : `LABEL_${x}`),
+                score: /** @type {number} */ (values[i]),
             }));
-            if (topk === 1) {
-                toReturn.push(...vals);
-            } else {
-                toReturn.push(vals);
-            }
+            toReturn.push(vals);
         }
 
-        return isBatched || topk === 1 ? /** @type {ImageClassificationOutput} */ (toReturn) : /** @type {ImageClassificationOutput[]} */ (toReturn)[0];
+        return Array.isArray(images) ? toReturn : toReturn[0];
     }
 
 }
@@ -2348,7 +2390,7 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe
  * 
  * @typedef {Object} ZeroShotObjectDetectionPipelineOptions Parameters specific to zero-shot object detection pipelines.
  * @property {number} [threshold=0.1] The probability necessary to make a prediction.
- * @property {number} [topk=null] The number of top predictions that will be returned by the pipeline.
+ * @property {number} [top_k=null] The number of top predictions that will be returned by the pipeline.
  * If the provided number is `null` or higher than the number of predictions available, it will default
  * to the number of predictions.
  * @property {boolean} [percentage=false] Whether to return the boxes coordinates in percentage (true) or in pixels (false).
@@ -2401,7 +2443,7 @@ export class ObjectDetectionPipeline extends (/** @type {new (options: ImagePipe
  * const detector = await pipeline('zero-shot-object-detection', 'Xenova/owlvit-base-patch32');
  * const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/beach.png';
  * const candidate_labels = ['hat', 'book', 'sunglasses', 'camera'];
- * const output = await detector(url, candidate_labels, { topk: 4, threshold: 0.05 });
+ * const output = await detector(url, candidate_labels, { top_k: 4, threshold: 0.05 });
  * // [
  * //   {
  * //     score: 0.1606510728597641,
@@ -2439,7 +2481,7 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
     /** @type {ZeroShotObjectDetectionPipelineCallback} */
     async _call(images, candidate_labels, {
         threshold = 0.1,
-        topk = null,
+        top_k = null,
         percentage = false,
     } = {}) {
 
@@ -2474,8 +2516,8 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
                 label: candidate_labels[processed.classes[i]],
                 box: get_bounding_box(box, !percentage),
             })).sort((a, b) => b.score - a.score);
-            if (topk !== null) {
-                result = result.slice(0, topk);
+            if (top_k !== null) {
+                result = result.slice(0, top_k);
             }
             toReturn.push(result)
         }
@@ -2492,7 +2534,7 @@ export class ZeroShotObjectDetectionPipeline extends (/** @type {new (options: T
  * @callback DocumentQuestionAnsweringPipelineCallback Answer the question given as input by using the document.
  * @param {ImageInput} image The image of the document to use.
  * @param {string} question A question to ask of the document.
- * @param {import('./utils/generation.js').GenerationConfigType} [options] Additional keyword arguments to pass along to the generate method of the model.
+ * @param {Partial<import('./generation/configuration_utils.js').GenerationConfig>} [options] Additional keyword arguments to pass along to the generate method of the model.
  * @returns {Promise<DocumentQuestionAnsweringOutput|DocumentQuestionAnsweringOutput[]>} An object (or array of objects) containing the answer(s).
  * 
  * @typedef {TextImagePipelineConstructorArgs & DocumentQuestionAnsweringPipelineCallback & Disposable} DocumentQuestionAnsweringPipelineType
@@ -2524,6 +2566,7 @@ export class DocumentQuestionAnsweringPipeline extends (/** @type {new (options:
 
     /** @type {DocumentQuestionAnsweringPipelineCallback} */
     async _call(image, question, generate_kwargs = {}) {
+        throw new Error('This pipeline is not yet supported in Transformers.js v3.'); // TODO: Remove when implemented
 
         // NOTE: For now, we only support a batch size of 1
 
@@ -2540,17 +2583,15 @@ export class DocumentQuestionAnsweringPipeline extends (/** @type {new (options:
         }).input_ids;
 
         // Run model
-        const output = await this.model.generate(
-            pixel_values,
-            {
-                ...generate_kwargs,
-                decoder_input_ids,
-                max_length: this.model.config.decoder.max_position_embeddings,
-            }
-        );
+        const output = await this.model.generate({
+            inputs: pixel_values,
+            max_length: this.model.config.decoder.max_position_embeddings,
+            decoder_input_ids,
+            ...generate_kwargs,
+        });
 
         // Decode output
-        const decoded = this.tokenizer.batch_decode(output)[0];
+        const decoded = this.tokenizer.batch_decode(/** @type {Tensor} */(output))[0];
 
         // Parse answer
         const match = decoded.match(/<s_answer>(.*?)<\/s_answer>/);
@@ -2671,7 +2712,7 @@ export class TextToAudioPipeline extends (/** @type {new (options: TextToAudioPi
         // Load vocoder, if not provided
         if (!this.vocoder) {
             console.log('No vocoder specified, using default HifiGan vocoder.');
-            this.vocoder = await AutoModel.from_pretrained(this.DEFAULT_VOCODER_ID, { quantized: false });
+            this.vocoder = await AutoModel.from_pretrained(this.DEFAULT_VOCODER_ID, { dtype: 'fp32' });
         }
 
         // Load speaker embeddings as Float32Array from path/URL
@@ -3005,7 +3046,7 @@ const SUPPORTED_TASKS = Object.freeze({
     "image-segmentation": {
         // no tokenizer
         "pipeline": ImageSegmentationPipeline,
-        "model": [AutoModelForImageSegmentation, AutoModelForSemanticSegmentation],
+        "model": [AutoModelForImageSegmentation, AutoModelForSemanticSegmentation, AutoModelForUniversalSegmentation],
         "processor": AutoProcessor,
         "default": {
             // TODO: replace with original
@@ -3164,7 +3205,7 @@ const TASK_ALIASES = Object.freeze({
  *  - `"zero-shot-image-classification"`: will return a `ZeroShotImageClassificationPipeline`.
  *  - `"zero-shot-object-detection"`: will return a `ZeroShotObjectDetectionPipeline`.
  * @param {string} [model=null] The name of the pre-trained model to use. If not specified, the default model for the task will be used.
- * @param {import('./utils/hub.js').PretrainedOptions} [options] Optional parameters for the pipeline.
+ * @param {import('./utils/hub.js').PretrainedModelOptions} [options] Optional parameters for the pipeline.
  * @returns {Promise<AllTasks[T]>} A Pipeline object for the specified task.
  * @throws {Error} If an unsupported pipeline is requested.
  */
@@ -3172,13 +3213,15 @@ export async function pipeline(
     task,
     model = null,
     {
-        quantized = true,
         progress_callback = null,
         config = null,
         cache_dir = null,
         local_files_only = false,
         revision = 'main',
+        device = null,
+        dtype = null,
         model_file_name = null,
+        session_options = {},
     } = {}
 ) {
     // Helper method to construct pipeline
@@ -3200,13 +3243,15 @@ export async function pipeline(
     }
 
     const pretrainedOptions = {
-        quantized,
         progress_callback,
         config,
         cache_dir,
         local_files_only,
         revision,
+        device,
+        dtype,
         model_file_name,
+        session_options,
     }
 
     const classes = new Map([
@@ -3243,7 +3288,7 @@ async function loadItems(mapping, model, pretrainedOptions) {
 
     /**@type {Promise[]} */
     const promises = [];
-    for (let [name, cls] of mapping.entries()) {
+    for (const [name, cls] of mapping.entries()) {
         if (!cls) continue;
 
         /**@type {Promise} */
@@ -3251,7 +3296,7 @@ async function loadItems(mapping, model, pretrainedOptions) {
         if (Array.isArray(cls)) {
             promise = new Promise(async (resolve, reject) => {
                 let e;
-                for (let c of cls) {
+                for (const c of cls) {
                     if (c === null) {
                         // If null, we resolve it immediately, meaning the relevant
                         // class was not found, but it is optional.
@@ -3262,7 +3307,17 @@ async function loadItems(mapping, model, pretrainedOptions) {
                         resolve(await c.from_pretrained(model, pretrainedOptions));
                         return;
                     } catch (err) {
-                        e = err;
+                        if (err.message?.includes('Unsupported model type')) {
+                            // If the error is due to an unsupported model type, we
+                            // save the error and try the next class.
+                            e = err;
+                        } else if (err.message?.includes('Could not locate file')) {
+                            e = err;
+                        } else {
+                            reject(err);
+                            return;
+                        }
+
                     }
                 }
                 reject(e);
@@ -3279,7 +3334,7 @@ async function loadItems(mapping, model, pretrainedOptions) {
     await Promise.all(promises);
 
     // Then assign to result
-    for (let [name, promise] of Object.entries(result)) {
+    for (const [name, promise] of Object.entries(result)) {
         result[name] = await promise;
     }
 
diff --git a/src/processors.js b/src/processors.js
index 4b9a60b51..e95dc31e9 100644
--- a/src/processors.js
+++ b/src/processors.js
@@ -4,7 +4,7 @@
  * 
  * **Example:** Using a `WhisperProcessor` to prepare an audio input for a model.
  * ```javascript
- * import { AutoProcessor, read_audio } from '@xenova/transformers';
+ * import { AutoProcessor, read_audio } from '@huggingface/transformers';
  *
  * let processor = await AutoProcessor.from_pretrained('openai/whisper-tiny.en');
  * let audio = await read_audio('https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac', 16000);
@@ -21,6 +21,9 @@
  */
 import {
     Callable,
+} from './utils/generic.js';
+
+import {
     calculateDimensions,
     calculateReflectOffset,
 } from './utils/core.js';
@@ -37,7 +40,7 @@ import {
 } from './utils/maths.js';
 
 
-import { Tensor, permute, cat, interpolate, stack } from './utils/tensor.js';
+import { Tensor, cat, interpolate, stack, interpolate_4d, full } from './utils/tensor.js';
 
 import { RawImage } from './utils/image.js';
 import {
@@ -70,7 +73,7 @@ function center_to_corners_format([centerX, centerY, width, height]) {
  * @param {Tensor} outputs.logits The logits
  * @param {Tensor} outputs.pred_boxes The predicted boxes.
  * @param {number} [threshold=0.5] The threshold to use for the scores.
- * @param {number[][]} [target_sizes=null] The sizes of the original images.
+ * @param {[number, number][]} [target_sizes=null] The sizes of the original images.
  * @param {boolean} [is_zero_shot=false] Whether zero-shot object detection was performed.
  * @return {Object[]} An array of objects containing the post-processed outputs.
  * @private
@@ -116,10 +119,13 @@ function post_process_object_detection(outputs, threshold = 0.5, target_sizes =
                     // This is the background class, skip it
                     continue;
                 }
-                indices.push(maxIndex);
-
                 // Compute softmax over classes
                 probs = softmax(logit.data);
+
+                if (probs[maxIndex] < threshold) {
+                    continue;
+                }
+                indices.push(maxIndex);
             }
 
             for (const index of indices) {
@@ -144,6 +150,364 @@ function post_process_object_detection(outputs, threshold = 0.5, target_sizes =
     return toReturn;
 }
 
+
+/**
+ * Post-processes the outputs of the model (for semantic segmentation).
+ * @param {*} outputs Raw outputs of the model.
+ * @param {[number, number][]} [target_sizes=null] List of tuples corresponding to the requested final size
+ * (height, width) of each prediction. If unset, predictions will not be resized.
+ * @returns {{segmentation: Tensor; labels: number[]}[]} The semantic segmentation maps.
+ */
+function post_process_semantic_segmentation(outputs, target_sizes = null) {
+
+    const logits = outputs.logits;
+    const batch_size = logits.dims[0];
+
+    if (target_sizes !== null && target_sizes.length !== batch_size) {
+        throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits")
+    }
+
+    const toReturn = [];
+    for (let i = 0; i < batch_size; ++i) {
+        const target_size = target_sizes !== null ? target_sizes[i] : null;
+
+        let data = logits[i];
+
+        // 1. If target_size is not null, we need to resize the masks to the target size
+        if (target_size !== null) {
+            // resize the masks to the target size
+            data = interpolate(data, target_size, 'bilinear', false);
+        }
+        const [height, width] = target_size ?? data.dims.slice(-2);
+
+        const segmentation = new Tensor(
+            'int32',
+            new Int32Array(height * width),
+            [height, width]
+        );
+
+        // Buffer to store current largest value
+        const buffer = data[0].data;
+        const segmentation_data = segmentation.data;
+        for (let j = 1; j < data.dims[0]; ++j) {
+            const row = data[j].data;
+            for (let k = 0; k < row.length; ++k) {
+                if (row[k] > buffer[k]) {
+                    buffer[k] = row[k];
+                    segmentation_data[k] = j;
+                }
+            }
+        }
+
+        // Store which objects have labels
+        // This is much more efficient that creating a set of the final values
+        const hasLabel = new Array(data.dims[0]);
+        for (let j = 0; j < segmentation_data.length; ++j) {
+            const index = segmentation_data[j];
+            hasLabel[index] = index;
+        }
+        /** @type {number[]} The unique list of labels that were detected */
+        const labels = hasLabel.filter(x => x !== undefined);
+
+        toReturn.push({ segmentation, labels });
+    }
+    return toReturn;
+}
+
+
+/**
+ * Binarize the given masks using `object_mask_threshold`, it returns the associated values of `masks`, `scores` and `labels`.
+ * @param {Tensor} class_logits The class logits.
+ * @param {Tensor} mask_logits The mask logits.
+ * @param {number} object_mask_threshold A number between 0 and 1 used to binarize the masks.
+ * @param {number} num_labels The number of labels.
+ * @returns {[Tensor[], number[], number[]]} The binarized masks, the scores, and the labels.
+ * @private
+ */
+function remove_low_and_no_objects(class_logits, mask_logits, object_mask_threshold, num_labels) {
+
+    const mask_probs_item = [];
+    const pred_scores_item = [];
+    const pred_labels_item = [];
+
+    for (let j = 0; j < class_logits.dims[0]; ++j) {
+        const cls = class_logits[j];
+        const mask = mask_logits[j];
+
+        const pred_label = max(cls.data)[1];
+        if (pred_label === num_labels) {
+            // Is the background, so we ignore it
+            continue;
+        }
+
+        const scores = softmax(cls.data);
+        const pred_score = scores[pred_label];
+        if (pred_score > object_mask_threshold) {
+            mask_probs_item.push(mask);
+            pred_scores_item.push(pred_score);
+            pred_labels_item.push(pred_label);
+        }
+    }
+
+    return [mask_probs_item, pred_scores_item, pred_labels_item];
+}
+
+/**
+ * Checks whether the segment is valid or not.
+ * @param {Int32Array} mask_labels Labels for each pixel in the mask.
+ * @param {Tensor[]} mask_probs Probabilities for each pixel in the masks.
+ * @param {number} k The class id of the segment.
+ * @param {number} mask_threshold The mask threshold.
+ * @param {number} overlap_mask_area_threshold The overlap mask area threshold.
+ * @returns {[boolean, number[]]} Whether the segment is valid or not, and the indices of the valid labels.
+ * @private
+ */
+function check_segment_validity(
+    mask_labels,
+    mask_probs,
+    k,
+    mask_threshold = 0.5,
+    overlap_mask_area_threshold = 0.8
+) {
+    // mask_k is a 1D array of indices, indicating where the mask is equal to k
+    const mask_k = [];
+    let mask_k_area = 0;
+    let original_area = 0;
+
+    const mask_probs_k_data = mask_probs[k].data;
+
+    // Compute the area of all the stuff in query k
+    for (let i = 0; i < mask_labels.length; ++i) {
+        if (mask_labels[i] === k) {
+            mask_k.push(i);
+            ++mask_k_area;
+        }
+
+        if (mask_probs_k_data[i] >= mask_threshold) {
+            ++original_area;
+        }
+    }
+    let mask_exists = mask_k_area > 0 && original_area > 0;
+
+    // Eliminate disconnected tiny segments
+    if (mask_exists) {
+        // Perform additional check
+        let area_ratio = mask_k_area / original_area;
+        mask_exists = area_ratio > overlap_mask_area_threshold;
+    }
+
+    return [mask_exists, mask_k]
+}
+
+/**
+ * Computes the segments.
+ * @param {Tensor[]} mask_probs The mask probabilities.
+ * @param {number[]} pred_scores The predicted scores.
+ * @param {number[]} pred_labels The predicted labels.
+ * @param {number} mask_threshold The mask threshold.
+ * @param {number} overlap_mask_area_threshold The overlap mask area threshold.
+ * @param {Set<number>} label_ids_to_fuse The label ids to fuse.
+ * @param {number[]} target_size The target size of the image.
+ * @returns {[Tensor, Array<{id: number, label_id: number, score: number}>]} The computed segments.
+ * @private
+ */
+function compute_segments(
+    mask_probs,
+    pred_scores,
+    pred_labels,
+    mask_threshold,
+    overlap_mask_area_threshold,
+    label_ids_to_fuse = null,
+    target_size = null,
+) {
+    const [height, width] = target_size ?? mask_probs[0].dims;
+
+    const segmentation = new Tensor(
+        'int32',
+        new Int32Array(height * width),
+        [height, width]
+    );
+    const segments = [];
+
+    // 1. If target_size is not null, we need to resize the masks to the target size
+    if (target_size !== null) {
+        // resize the masks to the target size
+        for (let i = 0; i < mask_probs.length; ++i) {
+            mask_probs[i] = interpolate(mask_probs[i], target_size, 'bilinear', false);
+        }
+    }
+
+    // 2. Weigh each mask by its prediction score
+    // NOTE: `mask_probs` is updated in-place
+    // 
+    // Temporary storage for the best label/scores for each pixel ([height, width]):
+    const mask_labels = new Int32Array(mask_probs[0].data.length);
+    const bestScores = new Float32Array(mask_probs[0].data.length);
+
+    for (let i = 0; i < mask_probs.length; ++i) {
+        let score = pred_scores[i];
+
+        const mask_probs_i_data = mask_probs[i].data;
+
+        for (let j = 0; j < mask_probs_i_data.length; ++j) {
+            mask_probs_i_data[j] *= score
+            if (mask_probs_i_data[j] > bestScores[j]) {
+                mask_labels[j] = i;
+                bestScores[j] = mask_probs_i_data[j];
+            }
+        }
+    }
+
+    let current_segment_id = 0;
+
+    // let stuff_memory_list = {}
+    const segmentation_data = segmentation.data;
+    for (let k = 0; k < pred_labels.length; ++k) {
+        const pred_class = pred_labels[k];
+
+        // TODO add `should_fuse`
+        // let should_fuse = pred_class in label_ids_to_fuse
+
+        // Check if mask exists and large enough to be a segment
+        const [mask_exists, mask_k] = check_segment_validity(
+            mask_labels,
+            mask_probs,
+            k,
+            mask_threshold,
+            overlap_mask_area_threshold
+        )
+
+        if (!mask_exists) {
+            // Nothing to see here
+            continue;
+        }
+
+        // TODO
+        // if (pred_class in stuff_memory_list) {
+        //     current_segment_id = stuff_memory_list[pred_class]
+        // } else {
+        //     current_segment_id += 1;
+        // }
+        ++current_segment_id;
+
+
+        // Add current object segment to final segmentation map
+        for (const index of mask_k) {
+            segmentation_data[index] = current_segment_id;
+        }
+
+        segments.push({
+            id: current_segment_id,
+            label_id: pred_class,
+            // was_fused: should_fuse, TODO
+            score: pred_scores[k],
+        })
+
+        // TODO
+        // if(should_fuse){
+        //     stuff_memory_list[pred_class] = current_segment_id
+        // }
+    }
+
+    return [segmentation, segments];
+}
+
+
+/**
+ * Post-process the model output to generate the final panoptic segmentation.
+ * @param {*} outputs The model output to post process
+ * @param {number} [threshold=0.5] The probability score threshold to keep predicted instance masks.
+ * @param {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
+ * @param {number} [overlap_mask_area_threshold=0.8] The overlap mask area threshold to merge or discard small disconnected parts within each binary instance mask.
+ * @param {Set<number>} [label_ids_to_fuse=null] The labels in this state will have all their instances be fused together.
+ * @param {[number, number][]} [target_sizes=null] The target sizes to resize the masks to.
+ * @returns {Array<{ segmentation: Tensor, segments_info: Array<{id: number, label_id: number, score: number}>}>}
+ */
+function post_process_panoptic_segmentation(
+    outputs,
+    threshold = 0.5,
+    mask_threshold = 0.5,
+    overlap_mask_area_threshold = 0.8,
+    label_ids_to_fuse = null,
+    target_sizes = null,
+) {
+    if (label_ids_to_fuse === null) {
+        console.warn("`label_ids_to_fuse` unset. No instance will be fused.")
+        label_ids_to_fuse = new Set();
+    }
+
+    const class_queries_logits = outputs.class_queries_logits ?? outputs.logits; // [batch_size, num_queries, num_classes+1]
+    const masks_queries_logits = outputs.masks_queries_logits ?? outputs.pred_masks; // [batch_size, num_queries, height, width]
+
+    const mask_probs = masks_queries_logits.sigmoid()  // [batch_size, num_queries, height, width]
+
+    let [batch_size, num_queries, num_labels] = class_queries_logits.dims;
+    num_labels -= 1; // Remove last class (background)
+
+    if (target_sizes !== null && target_sizes.length !== batch_size) {
+        throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits")
+    }
+
+    let toReturn = [];
+    for (let i = 0; i < batch_size; ++i) {
+        let target_size = target_sizes !== null ? target_sizes[i] : null;
+
+        let class_logits = class_queries_logits[i];
+        let mask_logits = mask_probs[i];
+
+        let [mask_probs_item, pred_scores_item, pred_labels_item] = remove_low_and_no_objects(class_logits, mask_logits, threshold, num_labels);
+
+        if (pred_labels_item.length === 0) {
+            // No mask found
+            let [height, width] = target_size ?? mask_logits.dims.slice(-2);
+
+            let segmentation = new Tensor(
+                'int32',
+                new Int32Array(height * width).fill(-1),
+                [height, width]
+            )
+            toReturn.push({
+                segmentation: segmentation,
+                segments_info: []
+            });
+            continue;
+        }
+
+
+        // Get segmentation map and segment information of batch item
+        let [segmentation, segments] = compute_segments(
+            mask_probs_item,
+            pred_scores_item,
+            pred_labels_item,
+            mask_threshold,
+            overlap_mask_area_threshold,
+            label_ids_to_fuse,
+            target_size,
+        )
+
+        toReturn.push({
+            segmentation: segmentation,
+            segments_info: segments
+        })
+    }
+
+    return toReturn;
+}
+
+
+/**
+ * Post-processes the outputs of the model (for instance segmentation).
+ * @param {*} outputs Raw outputs of the model.
+ * @param {number} [threshold=0.5] The probability score threshold to keep predicted instance masks.
+ * @param {[number, number][]} [target_sizes=null] List of tuples corresponding to the requested final size
+ * (height, width) of each prediction. If unset, predictions will not be resized.
+ * @returns {Array<{ segmentation: Tensor, segments_info: Array<{id: number, label_id: number, score: number}>}>}
+ */
+function post_process_instance_segmentation(outputs, threshold = 0.5, target_sizes = null) {
+    throw new Error('Not implemented yet');
+    return [];
+}
+
 /**
  * Named tuple to indicate the order we are using is (height x width), even though
  * the Graphics’ industry standard is (width x height).
@@ -334,10 +698,11 @@ export class ImageFeatureExtractor extends FeatureExtractor {
         const threshold = gray_threshold / 255;
 
         let x_min = gray_image.width, y_min = gray_image.height, x_max = 0, y_max = 0;
+        const gray_image_data = gray_image.data;
         for (let j = 0; j < gray_image.height; ++j) {
             const row = j * gray_image.width;
             for (let i = 0; i < gray_image.width; ++i) {
-                if ((gray_image.data[row + i] - minValue) / diff < threshold) {
+                if ((gray_image_data[row + i] - minValue) / diff < threshold) {
                     // We have a non-zero pixel, so we update the min/max values accordingly
                     x_min = Math.min(x_min, i);
                     y_min = Math.min(y_min, j);
@@ -684,7 +1049,7 @@ export class ImageFeatureExtractor extends FeatureExtractor {
         return {
             original_size: [srcHeight, srcWidth],
             reshaped_input_size: reshaped_input_size,
-            pixel_values: pixel_values,
+            pixel_values,
         }
     }
 
@@ -707,7 +1072,7 @@ export class ImageFeatureExtractor extends FeatureExtractor {
         const pixel_values = stack(imageData.map(x => x.pixel_values), 0);
 
         return {
-            pixel_values: pixel_values,
+            pixel_values,
 
             // Original sizes of images
             original_sizes: imageData.map(x => x.original_size),
@@ -719,76 +1084,25 @@ export class ImageFeatureExtractor extends FeatureExtractor {
 
 }
 
+export class SapiensFeatureExtractor extends ImageFeatureExtractor {
+    /** @type {typeof post_process_semantic_segmentation} */
+    post_process_semantic_segmentation(...args) {
+        return post_process_semantic_segmentation(...args);
+    }
+}
 export class SegformerFeatureExtractor extends ImageFeatureExtractor {
-
-    /**
-     * Converts the output of `SegformerForSemanticSegmentation` into semantic segmentation maps.
-     * @param {*} outputs Raw outputs of the model.
-     * @param {number[][]} [target_sizes=null] List of tuples corresponding to the requested final size
-     * (height, width) of each prediction. If unset, predictions will not be resized.
-     * @returns {{segmentation: Tensor; labels: number[]}[]} The semantic segmentation maps.
-     */
-    post_process_semantic_segmentation(outputs, target_sizes = null) {
-
-        const logits = outputs.logits;
-        const batch_size = logits.dims[0];
-
-        if (target_sizes !== null && target_sizes.length !== batch_size) {
-            throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits")
-        }
-
-        const toReturn = [];
-        for (let i = 0; i < batch_size; ++i) {
-            const target_size = target_sizes !== null ? target_sizes[i] : null;
-
-            let data = logits[i];
-
-            // 1. If target_size is not null, we need to resize the masks to the target size
-            if (target_size !== null) {
-                // resize the masks to the target size
-                data = interpolate(data, target_size, 'bilinear', false);
-            }
-            const [height, width] = target_size ?? data.dims.slice(-2);
-
-            const segmentation = new Tensor(
-                'int32',
-                new Int32Array(height * width),
-                [height, width]
-            );
-
-            // Buffer to store current largest value
-            const buffer = data[0].data;
-            for (let j = 1; j < data.dims[0]; ++j) {
-                const row = data[j].data;
-                for (let k = 0; k < row.length; ++k) {
-                    if (row[k] > buffer[k]) {
-                        buffer[k] = row[k];
-                        segmentation.data[k] = j;
-                    }
-                }
-            }
-
-            // Store which objects have labels
-            // This is much more efficient that creating a set of the final values
-            const hasLabel = new Array(data.dims[0]);
-            const out = segmentation.data;
-            for (let j = 0; j < out.length; ++j) {
-                const index = out[j];
-                hasLabel[index] = index;
-            }
-            /** @type {number[]} The unique list of labels that were detected */
-            const labels = hasLabel.filter(x => x !== undefined);
-
-            toReturn.push({ segmentation, labels });
-        }
-        return toReturn;
+    /** @type {typeof post_process_semantic_segmentation} */
+    post_process_semantic_segmentation(...args) {
+        return post_process_semantic_segmentation(...args);
     }
 }
+export class PvtImageProcessor extends ImageFeatureExtractor { }
 export class DPTFeatureExtractor extends ImageFeatureExtractor { }
 export class DPTImageProcessor extends DPTFeatureExtractor { } // NOTE: extends DPTFeatureExtractor
 export class BitImageProcessor extends ImageFeatureExtractor { }
 export class GLPNFeatureExtractor extends ImageFeatureExtractor { }
 export class CLIPFeatureExtractor extends ImageFeatureExtractor { }
+export class CLIPImageProcessor extends CLIPFeatureExtractor { } // NOTE: extends CLIPFeatureExtractor
 export class ChineseCLIPFeatureExtractor extends ImageFeatureExtractor { }
 export class SiglipImageProcessor extends ImageFeatureExtractor { }
 export class ConvNextFeatureExtractor extends ImageFeatureExtractor {
@@ -845,17 +1159,28 @@ export class EfficientNetImageProcessor extends ImageFeatureExtractor {
     }
 }
 
+export class MobileNetV1FeatureExtractor extends ImageFeatureExtractor { }
+export class MobileNetV2FeatureExtractor extends ImageFeatureExtractor { }
+export class MobileNetV3FeatureExtractor extends ImageFeatureExtractor { }
+export class MobileNetV4FeatureExtractor extends ImageFeatureExtractor { }
 
 export class MobileViTFeatureExtractor extends ImageFeatureExtractor { }
 export class MobileViTImageProcessor extends MobileViTFeatureExtractor { } // NOTE extends MobileViTFeatureExtractor
 export class OwlViTFeatureExtractor extends ImageFeatureExtractor {
-    /** @type {post_process_object_detection} */
+    /** @type {typeof post_process_object_detection} */
     post_process_object_detection(...args) {
         return post_process_object_detection(...args);
     }
 }
 export class Owlv2ImageProcessor extends OwlViTFeatureExtractor { } // NOTE extends OwlViTFeatureExtractor
 
+export class RTDetrImageProcessor extends ImageFeatureExtractor {
+    /** @type {typeof post_process_object_detection} */
+    post_process_object_detection(...args) {
+        return post_process_object_detection(...args);
+    }
+}
+
 export class DeiTFeatureExtractor extends ImageFeatureExtractor { }
 export class BeitFeatureExtractor extends ImageFeatureExtractor { }
 export class DonutFeatureExtractor extends ImageFeatureExtractor {
@@ -911,297 +1236,32 @@ export class DetrFeatureExtractor extends ImageFeatureExtractor {
         // TODO support different mask sizes (not just 64x64)
         // Currently, just fill pixel mask with 1s
         const maskSize = [result.pixel_values.dims[0], 64, 64];
-        const pixel_mask = new Tensor(
-            'int64',
-            new BigInt64Array(maskSize.reduce((a, b) => a * b)).fill(1n),
-            maskSize
-        );
+        const pixel_mask = full(maskSize, 1n);
 
         return { ...result, pixel_mask };
     }
 
-    /**
-     * Post-processes the outputs of the model (for object detection).
-     * @param {Object} outputs The outputs of the model that must be post-processed
-     * @param {Tensor} outputs.logits The logits
-     * @param {Tensor} outputs.pred_boxes The predicted boxes.
-     * @return {Object[]} An array of objects containing the post-processed outputs.
-     */
-
-    /** @type {post_process_object_detection} */
+    /** @type {typeof post_process_object_detection} */
     post_process_object_detection(...args) {
         return post_process_object_detection(...args);
     }
 
-    /**
-     * Binarize the given masks using `object_mask_threshold`, it returns the associated values of `masks`, `scores` and `labels`.
-     * @param {Tensor} class_logits The class logits.
-     * @param {Tensor} mask_logits The mask logits.
-     * @param {number} object_mask_threshold A number between 0 and 1 used to binarize the masks.
-     * @param {number} num_labels The number of labels.
-     * @returns {[Tensor[], number[], number[]]} The binarized masks, the scores, and the labels.
-     */
-    remove_low_and_no_objects(class_logits, mask_logits, object_mask_threshold, num_labels) {
-
-        let mask_probs_item = [];
-        let pred_scores_item = [];
-        let pred_labels_item = [];
-
-        for (let j = 0; j < class_logits.dims[0]; ++j) {
-            let cls = class_logits[j];
-            let mask = mask_logits[j];
-
-            let pred_label = max(cls.data)[1];
-            if (pred_label === num_labels) {
-                // Is the background, so we ignore it
-                continue;
-            }
-
-            let scores = softmax(cls.data);
-            let pred_score = scores[pred_label];
-            if (pred_score > object_mask_threshold) {
-                mask_probs_item.push(mask);
-                pred_scores_item.push(pred_score);
-                pred_labels_item.push(pred_label);
-            }
-        }
-
-        return [mask_probs_item, pred_scores_item, pred_labels_item];
-
-    }
-
-    /**
-     * Checks whether the segment is valid or not.
-     * @param {Int32Array} mask_labels Labels for each pixel in the mask.
-     * @param {Tensor[]} mask_probs Probabilities for each pixel in the masks.
-     * @param {number} k The class id of the segment.
-     * @param {number} mask_threshold The mask threshold.
-     * @param {number} overlap_mask_area_threshold The overlap mask area threshold.
-     * @returns {[boolean, number[]]} Whether the segment is valid or not, and the indices of the valid labels.
-     */
-    check_segment_validity(
-        mask_labels,
-        mask_probs,
-        k,
-        mask_threshold = 0.5,
-        overlap_mask_area_threshold = 0.8
-    ) {
-        // mask_k is a 1D array of indices, indicating where the mask is equal to k
-        let mask_k = [];
-        let mask_k_area = 0;
-        let original_area = 0;
-
-        // Compute the area of all the stuff in query k
-        for (let i = 0; i < mask_labels.length; ++i) {
-            if (mask_labels[i] === k) {
-                mask_k.push(i);
-                ++mask_k_area;
-            }
-
-            if (mask_probs[k].data[i] >= mask_threshold) {
-                ++original_area;
-            }
-        }
-        let mask_exists = mask_k_area > 0 && original_area > 0;
-
-        // Eliminate disconnected tiny segments
-        if (mask_exists) {
-            // Perform additional check
-            let area_ratio = mask_k_area / original_area;
-            mask_exists = area_ratio > overlap_mask_area_threshold;
-        }
-
-        return [mask_exists, mask_k]
+    /** @type {typeof post_process_panoptic_segmentation} */
+    post_process_panoptic_segmentation(...args) {
+        return post_process_panoptic_segmentation(...args);
     }
 
-    /**
-     * Computes the segments.
-     * @param {Tensor[]} mask_probs The mask probabilities.
-     * @param {number[]} pred_scores The predicted scores.
-     * @param {number[]} pred_labels The predicted labels.
-     * @param {number} mask_threshold The mask threshold.
-     * @param {number} overlap_mask_area_threshold The overlap mask area threshold.
-     * @param {Set<number>} label_ids_to_fuse The label ids to fuse.
-     * @param {number[]} target_size The target size of the image.
-     * @returns {[Tensor, Array<{id: number, label_id: number, score: number}>]} The computed segments.
-     */
-    compute_segments(
-        mask_probs,
-        pred_scores,
-        pred_labels,
-        mask_threshold,
-        overlap_mask_area_threshold,
-        label_ids_to_fuse = null,
-        target_size = null,
-    ) {
-        let [height, width] = target_size ?? mask_probs[0].dims;
-
-        let segmentation = new Tensor(
-            'int32',
-            new Int32Array(height * width),
-            [height, width]
-        );
-        let segments = [];
-
-        // 1. If target_size is not null, we need to resize the masks to the target size
-        if (target_size !== null) {
-            // resize the masks to the target size
-            for (let i = 0; i < mask_probs.length; ++i) {
-                mask_probs[i] = interpolate(mask_probs[i], target_size, 'bilinear', false);
-            }
-        }
-
-        // 2. Weigh each mask by its prediction score
-        // NOTE: `mask_probs` is updated in-place
-        // 
-        // Temporary storage for the best label/scores for each pixel ([height, width]):
-        let mask_labels = new Int32Array(mask_probs[0].data.length);
-        let bestScores = new Float32Array(mask_probs[0].data.length);
-
-        for (let i = 0; i < mask_probs.length; ++i) {
-            let score = pred_scores[i];
-
-            for (let j = 0; j < mask_probs[i].data.length; ++j) {
-                mask_probs[i].data[j] *= score
-                if (mask_probs[i].data[j] > bestScores[j]) {
-                    mask_labels[j] = i;
-                    bestScores[j] = mask_probs[i].data[j];
-                }
-            }
-        }
-
-        let current_segment_id = 0;
-
-        // let stuff_memory_list = {}
-        for (let k = 0; k < pred_labels.length; ++k) {
-            let pred_class = pred_labels[k];
-
-            // TODO add `should_fuse`
-            // let should_fuse = pred_class in label_ids_to_fuse
-
-            // Check if mask exists and large enough to be a segment
-            let [mask_exists, mask_k] = this.check_segment_validity(
-                mask_labels,
-                mask_probs,
-                k,
-                mask_threshold,
-                overlap_mask_area_threshold
-            )
-
-            if (!mask_exists) {
-                // Nothing to see here
-                continue;
-            }
-
-            // TODO
-            // if (pred_class in stuff_memory_list) {
-            //     current_segment_id = stuff_memory_list[pred_class]
-            // } else {
-            //     current_segment_id += 1;
-            // }
-            ++current_segment_id;
-
-
-            // Add current object segment to final segmentation map
-            for (let index of mask_k) {
-                segmentation.data[index] = current_segment_id;
-            }
-
-            segments.push({
-                id: current_segment_id,
-                label_id: pred_class,
-                // was_fused: should_fuse, TODO
-                score: pred_scores[k],
-            })
-
-            // TODO
-            // if(should_fuse){
-            //     stuff_memory_list[pred_class] = current_segment_id
-            // }
-        }
-
-        return [segmentation, segments];
+    post_process_instance_segmentation() {
+        // TODO
+        throw Error("Not implemented yet");
     }
+}
 
-    /**
-     * Post-process the model output to generate the final panoptic segmentation.
-     * @param {*} outputs The model output to post process
-     * @param {number} [threshold=0.5] The probability score threshold to keep predicted instance masks.
-     * @param {number} [mask_threshold=0.5] Threshold to use when turning the predicted masks into binary values.
-     * @param {number} [overlap_mask_area_threshold=0.8] The overlap mask area threshold to merge or discard small disconnected parts within each binary instance mask.
-     * @param {Set<number>} [label_ids_to_fuse=null] The labels in this state will have all their instances be fused together.
-     * @param {number[][]} [target_sizes=null] The target sizes to resize the masks to.
-     * @returns {Array<{ segmentation: Tensor, segments_info: Array<{id: number, label_id: number, score: number}>}>}
-     */
-    post_process_panoptic_segmentation(
-        outputs,
-        threshold = 0.5,
-        mask_threshold = 0.5,
-        overlap_mask_area_threshold = 0.8,
-        label_ids_to_fuse = null,
-        target_sizes = null,
-    ) {
-        if (label_ids_to_fuse === null) {
-            console.warn("`label_ids_to_fuse` unset. No instance will be fused.")
-            label_ids_to_fuse = new Set();
-        }
-
-        const class_queries_logits = outputs.logits; // [batch_size, num_queries, num_classes+1]
-        const masks_queries_logits = outputs.pred_masks; // [batch_size, num_queries, height, width]
-
-        const mask_probs = masks_queries_logits.sigmoid()  // [batch_size, num_queries, height, width]
-
-        let [batch_size, num_queries, num_labels] = class_queries_logits.dims;
-        num_labels -= 1; // Remove last class (background)
-
-        if (target_sizes !== null && target_sizes.length !== batch_size) {
-            throw Error("Make sure that you pass in as many target sizes as the batch dimension of the logits")
-        }
-
-        let toReturn = [];
-        for (let i = 0; i < batch_size; ++i) {
-            let target_size = target_sizes !== null ? target_sizes[i] : null;
-
-            let class_logits = class_queries_logits[i];
-            let mask_logits = mask_probs[i];
-
-            let [mask_probs_item, pred_scores_item, pred_labels_item] = this.remove_low_and_no_objects(class_logits, mask_logits, threshold, num_labels);
-
-            if (pred_labels_item.length === 0) {
-                // No mask found
-                let [height, width] = target_size ?? mask_logits.dims.slice(-2);
-
-                let segmentation = new Tensor(
-                    'int32',
-                    new Int32Array(height * width).fill(-1),
-                    [height, width]
-                )
-                toReturn.push({
-                    segmentation: segmentation,
-                    segments_info: []
-                });
-                continue;
-            }
-
-
-            // Get segmentation map and segment information of batch item
-            let [segmentation, segments] = this.compute_segments(
-                mask_probs_item,
-                pred_scores_item,
-                pred_labels_item,
-                mask_threshold,
-                overlap_mask_area_threshold,
-                label_ids_to_fuse,
-                target_size,
-            )
-
-            toReturn.push({
-                segmentation: segmentation,
-                segments_info: segments
-            })
-        }
+export class MaskFormerFeatureExtractor extends ImageFeatureExtractor {
 
-        return toReturn;
+    /** @type {typeof post_process_panoptic_segmentation} */
+    post_process_panoptic_segmentation(...args) {
+        return post_process_panoptic_segmentation(...args);
     }
 
     post_process_instance_segmentation() {
@@ -1210,8 +1270,9 @@ export class DetrFeatureExtractor extends ImageFeatureExtractor {
     }
 }
 
+
 export class YolosFeatureExtractor extends ImageFeatureExtractor {
-    /** @type {post_process_object_detection} */
+    /** @type {typeof post_process_object_detection} */
     post_process_object_detection(...args) {
         return post_process_object_detection(...args);
     }
@@ -1224,6 +1285,7 @@ export class YolosFeatureExtractor extends ImageFeatureExtractor {
  * @property {HeightWidth[]} reshaped_input_sizes
  * @property {Tensor} [input_points]
  * @property {Tensor} [input_labels]
+ * @property {Tensor} [input_boxes]
  */
 
 export class SamImageProcessor extends ImageFeatureExtractor {
@@ -1235,7 +1297,7 @@ export class SamImageProcessor extends ImageFeatureExtractor {
      * @param {HeightWidth[]} reshaped_input_sizes 
      * @returns {Tensor}
      */
-    reshape_input_points(input_points, original_sizes, reshaped_input_sizes) {
+    reshape_input_points(input_points, original_sizes, reshaped_input_sizes, is_bounding_box = false) {
 
         // Make deep copy to avoid altering user's input
         input_points = structuredClone(input_points);
@@ -1244,7 +1306,9 @@ export class SamImageProcessor extends ImageFeatureExtractor {
         // TODO: add support for 2D input_points
         if (shape.length === 3) {
             // Correct user's input
-            shape = [1, ...shape];
+            if (!is_bounding_box) {
+                shape = [1, ...shape];
+            }
             input_points = [input_points];
         } else if (shape.length !== 4) {
             throw Error("The input_points must be a 4D tensor of shape `batch_size`, `point_batch_size`, `nb_points_per_image`, `2`.")
@@ -1262,8 +1326,8 @@ export class SamImageProcessor extends ImageFeatureExtractor {
 
             for (let j = 0; j < input_points[i].length; ++j) { // point_batch_size
                 for (let k = 0; k < input_points[i][j].length; ++k) { // nb_points_per_image
-                    for (let w = 0; w < input_points[i][j][k].length; ++w) { // 2
-                        input_points[i][j][k][w] *= resizeFactors[w];
+                    for (let w = 0; w < input_points[i][j][k].length; ++w) { // 2 or 4
+                        input_points[i][j][k][w] *= resizeFactors[w % 2];
                     }
                 }
             }
@@ -1304,15 +1368,29 @@ export class SamImageProcessor extends ImageFeatureExtractor {
     }
     /**
      * @param {any[]} images The URL(s) of the image(s) to extract features from.
-     * @param {any} [input_points] A 3D or 4D array, representing the input points provided by the user.
+     * @param {Object} [options] Additional options for the processor.
+     * @param {any} [options.input_points=null] A 3D or 4D array, representing the input points provided by the user.
      * - 3D: `[point_batch_size, nb_points_per_image, 2]`. In this case, `batch_size` is assumed to be 1.
      * - 4D: `[batch_size, point_batch_size, nb_points_per_image, 2]`.
-     * @param {any} [input_labels] A 2D or 3D array, representing the input labels for the points, used by the prompt encoder to encode the prompt.
+     * @param {any} [options.input_labels=null] A 2D or 3D array, representing the input labels for the points, used by the prompt encoder to encode the prompt.
      * - 2D: `[point_batch_size, nb_points_per_image]`. In this case, `batch_size` is assumed to be 1.
      * - 3D: `[batch_size, point_batch_size, nb_points_per_image]`.
+     * @param {number[][][]} [options.input_boxes=null] A 3D array of shape `(batch_size, num_boxes, 4)`, representing the input boxes provided by the user.
+     * This is used by the prompt encoder to encode the prompt. Generally yields to much better generated masks.
+     * The processor will generate a tensor, with each dimension corresponding respectively to the image batch size,
+     * the number of boxes per image and the coordinates of the top left and botton right point of the box.
+     * In the order (`x1`, `y1`, `x2`, `y2`):
+     * - `x1`: the x coordinate of the top left point of the input box
+     * - `y1`: the y coordinate of the top left point of the input box
+     * - `x2`: the x coordinate of the bottom right point of the input box
+     * - `y2`: the y coordinate of the bottom right point of the input box
      * @returns {Promise<SamImageProcessorResult>}
      */
-    async _call(images, input_points = null, input_labels = null) {
+    async _call(images, {
+        input_points = null,
+        input_labels = null,
+        input_boxes = null
+    } = {}) {
         // TODO allow user to use preprocessed images
         /** @type {SamImageProcessorResult} */
         const processed = await super._call(images);
@@ -1330,23 +1408,29 @@ export class SamImageProcessor extends ImageFeatureExtractor {
             processed.input_labels = this.add_input_labels(input_labels, processed.input_points);
         }
 
+        if (input_boxes) {
+            processed.input_boxes = this.reshape_input_points(
+                input_boxes, processed.original_sizes, processed.reshaped_input_sizes, true,
+            );
+        }
+
         return processed;
     }
 
     /**
      * Remove padding and upscale masks to the original image size.
      * @param {Tensor} masks Batched masks from the mask_decoder in (batch_size, num_channels, height, width) format.
-     * @param {number[][]} original_sizes The original sizes of each image before it was resized to the model's expected input shape, in (height, width) format.
-     * @param {number[][]} reshaped_input_sizes The size of each image as it is fed to the model, in (height, width) format. Used to remove padding.
+     * @param {[number, number][]} original_sizes The original sizes of each image before it was resized to the model's expected input shape, in (height, width) format.
+     * @param {[number, number][]} reshaped_input_sizes The size of each image as it is fed to the model, in (height, width) format. Used to remove padding.
      * @param {Object} options Optional parameters for post-processing.
      * @param {number} [options.mask_threshold] The threshold to use for binarizing the masks.
      * @param {boolean} [options.binarize] Whether to binarize the masks.
      * @param {Object} [options.pad_size] The target size the images were padded to before being passed to the model. If `null`, the target size is assumed to be the processor's `pad_size`.
      * @param {number} [options.pad_size.height] The height the images were padded to.
      * @param {number} [options.pad_size.width] The width the images were padded to.
-     * @returns {Tensor[]} Batched masks in batch_size, num_channels, height, width) format, where (height, width) is given by original_size.
+     * @returns {Promise<Tensor[]>} Batched masks in batch_size, num_channels, height, width) format, where (height, width) is given by original_size.
      */
-    post_process_masks(masks, original_sizes, reshaped_input_sizes, {
+    async post_process_masks(masks, original_sizes, reshaped_input_sizes, {
         mask_threshold = 0.0,
         binarize = true,
         pad_size = null,
@@ -1357,50 +1441,72 @@ export class SamImageProcessor extends ImageFeatureExtractor {
 
         pad_size = pad_size ?? this.pad_size;
 
+        /** @type {[number, number]} */
         const target_image_size = [pad_size.height, pad_size.width];
 
         for (let i = 0; i < original_sizes.length; ++i) {
             const original_size = original_sizes[i];
             const reshaped_input_size = reshaped_input_sizes[i];
 
-            const mask = masks[i]; // [b, c, h, w]
-
-            // TODO: improve
-            const interpolated_masks = [];
-            for (let j = 0; j < mask.dims[0]; ++j) {
-                const m = mask[j]; // 3d tensor
-
-                // Upscale mask to padded size
-                let interpolated_mask = interpolate(m, target_image_size, 'bilinear', false);
-
-                // Crop mask
-                interpolated_mask = interpolated_mask.slice(null, [0, reshaped_input_size[0]], [0, reshaped_input_size[1]]);
-
-                // Downscale mask
-                interpolated_mask = interpolate(interpolated_mask, original_size, 'bilinear', false);
-
-                if (binarize) {
-                    const binarizedMaskData = new Uint8Array(interpolated_mask.data.length);
-                    for (let i = 0; i < interpolated_mask.data.length; ++i) {
-                        if (interpolated_mask.data[i] > mask_threshold) {
-                            binarizedMaskData[i] = 1;
-                        }
+            // Upscale mask to padded size
+            let interpolated_mask = (await interpolate_4d(
+                masks[i],
+                { mode: 'bilinear', size: target_image_size }
+            ));
+
+            // Crop mask
+            interpolated_mask = interpolated_mask.slice(null, null, [0, reshaped_input_size[0]], [0, reshaped_input_size[1]]);
+
+            // Downscale mask
+            interpolated_mask = (await interpolate_4d(
+                interpolated_mask,
+                { mode: 'bilinear', size: original_size }
+            ));
+
+            if (binarize) {
+                const data = interpolated_mask.data;
+                const binarizedMaskData = new Uint8Array(data.length);
+                for (let i = 0; i < data.length; ++i) {
+                    if (data[i] > mask_threshold) {
+                        binarizedMaskData[i] = 1;
                     }
-                    interpolated_mask = new Tensor(
-                        'bool',
-                        binarizedMaskData,
-                        interpolated_mask.dims
-                    )
                 }
-
-                interpolated_masks.push(interpolated_mask);
+                interpolated_mask = new Tensor(
+                    'bool',
+                    binarizedMaskData,
+                    interpolated_mask.dims
+                )
             }
 
-            output_masks.push(stack(interpolated_masks));
+            output_masks.push(interpolated_mask);
         }
 
         return output_masks;
     }
+
+    /**
+     * Generates a list of crop boxes of different sizes. Each layer has (2**i)**2 boxes for the ith layer.
+     * @param {RawImage} image Input original image
+     * @param {number} target_size Target size of the resized image
+     * @param {Object} options Options for generating crop boxes 
+     * @param {number} [options.crop_n_layers] If >0, mask prediction will be run again on crops of the image.
+     * Sets the number of layers to run, where each layer has 2**i_layer number of image crops.
+     * @param {number} [options.overlap_ratio] Sets the degree to which crops overlap. In the first crop layer,
+     * crops will overlap by this fraction of the image length. Later layers with more crops scale down this overlap.
+     * @param {number} [options.points_per_crop] Number of points to sample from each crop.
+     * @param {number} [options.crop_n_points_downscale_factor] The number of points-per-side sampled in layer n is
+     * scaled down by crop_n_points_downscale_factor**n.
+     * @returns {Object} An object containing the crop boxes, number of points per crop, cropped images, and input labels.
+     */
+    generate_crop_boxes(image, target_size, {
+        crop_n_layers = 0,
+        overlap_ratio = 512 / 1500,
+        points_per_crop = 32,
+        crop_n_points_downscale_factor = 1,
+    } = {}) {
+        // TODO: Implement
+        // return { crop_boxes, points_per_crop, cropped_images, input_labels }
+    }
 }
 
 export class Swin2SRImageProcessor extends ImageFeatureExtractor {
@@ -1455,7 +1561,7 @@ export class VitMatteImageProcessor extends ImageFeatureExtractor {
         ), 0);
 
         return {
-            pixel_values: pixel_values,
+            pixel_values,
 
             // Original sizes of images
             original_sizes: imageData.map(x => x.original_size),
@@ -1488,10 +1594,10 @@ export class WhisperFeatureExtractor extends FeatureExtractor {
     /**
      * Computes the log-Mel spectrogram of the provided audio waveform.
      * @param {Float32Array|Float64Array} waveform The audio waveform to process.
-     * @returns {{data: Float32Array, dims: number[]}} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
+     * @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
      */
-    _extract_fbank_features(waveform) {
-        const { data, dims } = spectrogram(
+    async _extract_fbank_features(waveform) {
+        const features = await spectrogram(
             waveform,
             this.window, // window
             this.config.n_fft, // frame_length
@@ -1506,13 +1612,14 @@ export class WhisperFeatureExtractor extends FeatureExtractor {
             }
         )
 
+        const data = features.data;
         const maxValue = max(data)[0];
 
         for (let i = 0; i < data.length; ++i) {
             data[i] = (Math.max(data[i], maxValue - 8.0) + 4.0) / 4.0;
         }
 
-        return { data, dims };
+        return features;
     }
 
     /**
@@ -1537,13 +1644,10 @@ export class WhisperFeatureExtractor extends FeatureExtractor {
             waveform.set(audio);
         }
 
-        const { data, dims } = this._extract_fbank_features(waveform);
+        const features = await this._extract_fbank_features(waveform);
 
         return {
-            input_features: new Tensor('float32',
-                data,
-                [1, ...dims]
-            )
+            input_features: features.unsqueeze_(0)
         };
     }
 }
@@ -1622,9 +1726,9 @@ export class SeamlessM4TFeatureExtractor extends FeatureExtractor {
      * Computes the log-Mel spectrogram of the provided audio waveform.
      * @param {Float32Array|Float64Array} waveform The audio waveform to process.
      * @param {number} max_length The maximum number of frames to return.
-     * @returns {{data: Float32Array, dims: number[]}} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
+     * @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
      */
-    _extract_fbank_features(waveform, max_length) {
+    async _extract_fbank_features(waveform, max_length) {
         // NOTE: We don't pad/truncate since that is passed in as `max_num_frames`
 
         // Kaldi compliance: 16-bit signed integers
@@ -1671,28 +1775,29 @@ export class SeamlessM4TFeatureExtractor extends FeatureExtractor {
     } = {}) {
         validate_audio_inputs(audio, 'SeamlessM4TFeatureExtractor');
 
-        let features = this._extract_fbank_features(audio, this.config.max_length);
+        let features = await this._extract_fbank_features(audio, this.config.max_length);
 
         if (do_normalize_per_mel_bins) {
             const [num_features, feature_size] = features.dims;
+            const data = features.data;
             for (let i = 0; i < feature_size; ++i) {
                 let sum = 0;
                 for (let j = 0; j < num_features; ++j) {
-                    sum += features.data[j * feature_size + i];
+                    sum += data[j * feature_size + i];
                 }
 
                 const mean = sum / num_features;
 
                 let variance = 0;
                 for (let j = 0; j < num_features; ++j) {
-                    variance += (features.data[j * feature_size + i] - mean) ** 2;
+                    variance += (data[j * feature_size + i] - mean) ** 2;
                 }
                 variance /= num_features - 1; // NOTE: We use ddof=1
 
                 const std = Math.sqrt(variance + 1e-7);
                 for (let j = 0; j < num_features; ++j) {
                     const index = j * feature_size + i;
-                    features.data[index] = (features.data[index] - mean) / std;
+                    data[index] = (data[index] - mean) / std;
                 }
             }
         }
@@ -1700,18 +1805,20 @@ export class SeamlessM4TFeatureExtractor extends FeatureExtractor {
         let padded_attention_mask;
         if (padding) {
             const [num_frames, num_channels] = features.dims;
+            const data = /** @type {Float32Array} */(features.data);
 
             const pad_size = num_frames % pad_to_multiple_of;
             if (pad_size > 0) {
                 const padded_data = new Float32Array(num_channels * (num_frames + pad_size));
-                padded_data.set(features.data)
-                padded_data.fill(this.config.padding_value, features.data.length)
+                padded_data.set(data)
+                padded_data.fill(this.config.padding_value, data.length)
 
                 const numPaddedFrames = num_frames + pad_size;
-                features = {
-                    data: padded_data,
-                    dims: [numPaddedFrames, num_channels],
-                }
+                features = new Tensor(
+                    features.type,
+                    padded_data,
+                    [numPaddedFrames, num_channels],
+                )
 
                 if (return_attention_mask) {
                     padded_attention_mask = new Tensor(
@@ -1732,10 +1839,7 @@ export class SeamlessM4TFeatureExtractor extends FeatureExtractor {
             throw new Error(`The number of frames (${num_frames}) must be a multiple of the stride (${stride}).`)
         }
 
-        const input_features = new Tensor('float32',
-            features.data,
-            features.dims,
-        ).view(
+        const input_features = features.view(
             1,
             Math.floor(num_frames / stride),
             num_channels * stride,
@@ -1746,20 +1850,21 @@ export class SeamlessM4TFeatureExtractor extends FeatureExtractor {
         if (return_attention_mask) {
             const reshapedNumFrames = input_features.dims[1];
 
-            const attention_mask = new Tensor(
-                'int64',
-                new BigInt64Array(reshapedNumFrames),
-                [1, reshapedNumFrames],
-            );
+            const attention_mask_data = new BigInt64Array(reshapedNumFrames);
+
             if (padded_attention_mask) {
+                const padded_attention_mask_data = padded_attention_mask.data;
                 for (let i = 1, j = 0; i < num_frames; i += stride, ++j) {
-                    attention_mask.data[j] = padded_attention_mask.data[i];
+                    attention_mask_data[j] = padded_attention_mask_data[i];
                 }
             } else {
-                attention_mask.data.fill(1n);
+                attention_mask_data.fill(1n);
             }
-
-            result.attention_mask = attention_mask;
+            result.attention_mask = new Tensor(
+                'int64',
+                attention_mask_data,
+                [1, reshapedNumFrames],
+            );
         }
 
         return result;
@@ -1802,9 +1907,9 @@ export class ASTFeatureExtractor extends FeatureExtractor {
      * Computes the log-Mel spectrogram of the provided audio waveform.
      * @param {Float32Array|Float64Array} waveform The audio waveform to process.
      * @param {number} max_length The maximum number of frames to return.
-     * @returns {{data: Float32Array, dims: number[]}} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
+     * @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
      */
-    _extract_fbank_features(waveform, max_length) {
+    async _extract_fbank_features(waveform, max_length) {
         // NOTE: We don't pad/truncate since that is passed in as `max_num_frames`
         return spectrogram(
             waveform,
@@ -1837,20 +1942,18 @@ export class ASTFeatureExtractor extends FeatureExtractor {
     async _call(audio) {
         validate_audio_inputs(audio, 'ASTFeatureExtractor');
 
-        const features = this._extract_fbank_features(audio, this.config.max_length);
+        const features = await this._extract_fbank_features(audio, this.config.max_length);
         if (this.config.do_normalize) {
             // Normalize the input audio spectrogram to have mean=0, std=0.5
             const denom = this.std * 2;
-            for (let i = 0; i < features.data.length; ++i) {
-                features.data[i] = (features.data[i] - this.mean) / denom;
+            const features_data = features.data;
+            for (let i = 0; i < features_data.length; ++i) {
+                features_data[i] = (features_data[i] - this.mean) / denom;
             }
         }
 
         return {
-            input_values: new Tensor('float32',
-                features.data,
-                [1, ...features.dims]
-            )
+            input_values: features.unsqueeze_(0)
         };
     }
 }
@@ -1903,11 +2006,12 @@ export class ClapFeatureExtractor extends FeatureExtractor {
      * @param {number} max_length The maximum length of the waveform.
      * @param {string} truncation The truncation strategy to use.
      * @param {string} padding The padding strategy to use.
-     * @returns {{ data: Float32Array; dims: number[]; longer: boolean; }} An object containing the mel spectrogram data as a Float32Array, its dimensions as an array of numbers, and a boolean indicating whether the waveform was longer than the max length.
+     * @returns {Promise<Tensor>} An object containing the mel spectrogram data as a Float32Array, its dimensions as an array of numbers, and a boolean indicating whether the waveform was longer than the max length.
+     * @private
      */
-    _get_input_mel(waveform, max_length, truncation, padding) {
+    async _get_input_mel(waveform, max_length, truncation, padding) {
 
-        /** @type {{ data: Float32Array; dims: number[]}} */
+        /** @type {Tensor} */
         let input_mel;
         let longer = false;
         const diff = waveform.length - max_length;
@@ -1917,8 +2021,7 @@ export class ClapFeatureExtractor extends FeatureExtractor {
                 const idx = Math.floor(Math.random() * (diff + 1));
                 waveform = waveform.subarray(idx, idx + max_length);
 
-                input_mel = this._extract_fbank_features(waveform, this.mel_filters_slaney, this.config.nb_max_samples);
-                input_mel.dims = [1, ...input_mel.dims]; // "unsqueeze"
+                input_mel = await this._extract_fbank_features(waveform, this.mel_filters_slaney, this.config.nb_max_samples);
             } else {
                 // TODO implement fusion strategy
                 throw new Error(`Truncation strategy "${truncation}" not implemented`)
@@ -1944,14 +2047,10 @@ export class ClapFeatureExtractor extends FeatureExtractor {
                 throw new Error(`Truncation strategy "${truncation}" not implemented`)
             }
 
-            input_mel = this._extract_fbank_features(waveform, this.mel_filters_slaney, this.config.nb_max_samples);
-            input_mel.dims = [1, ...input_mel.dims]; // "unsqueeze"
+            input_mel = await this._extract_fbank_features(waveform, this.mel_filters_slaney, this.config.nb_max_samples);
         }
 
-        return {
-            ...input_mel,
-            longer,
-        }
+        return input_mel.unsqueeze_(0);
     }
 
     /**
@@ -1967,9 +2066,9 @@ export class ClapFeatureExtractor extends FeatureExtractor {
      * @param {Float32Array|Float64Array} waveform The audio waveform to process.
      * @param {number[][]} mel_filters The mel filters to use.
      * @param {number} [max_length=null] The maximum number of frames to return.
-     * @returns {{data: Float32Array, dims: number[]}} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
+     * @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
      */
-    _extract_fbank_features(waveform, mel_filters, max_length = null) {
+    async _extract_fbank_features(waveform, mel_filters, max_length = null) {
         // NOTE: We don't pad/truncate since that is passed in as `max_num_frames`
         return spectrogram(
             waveform,
@@ -2001,24 +2100,195 @@ export class ClapFeatureExtractor extends FeatureExtractor {
         validate_audio_inputs(audio, 'ClapFeatureExtractor');
 
         // convert to mel spectrogram, truncate and pad if needed.
-        const padded_inputs = this._get_input_mel(
+        const padded_inputs = await this._get_input_mel(
             audio,
             max_length ?? this.config.nb_max_samples,
             this.config.truncation,
             this.config.padding,
         );
 
+        return {
+            input_features: padded_inputs.unsqueeze_(0),
+        }
+    }
+}
+
+
+export class PyAnnoteFeatureExtractor extends FeatureExtractor {
+    /**
+     * Asynchronously extracts features from a given audio using the provided configuration.
+     * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
+     * @returns {Promise<{ input_values: Tensor; }>} The extracted input features.
+     */
+    async _call(audio) {
+        validate_audio_inputs(audio, 'PyAnnoteFeatureExtractor');
+
+        if (audio instanceof Float64Array) {
+            audio = new Float32Array(audio);
+        }
 
+        const shape = [
+            1,            /* batch_size */
+            1,            /* num_channels */
+            audio.length, /* num_samples */
+        ];
         return {
-            input_features: new Tensor('float32',
-                padded_inputs.data,
-                [1, ...padded_inputs.dims]
-            )
+            input_values: new Tensor('float32', audio, shape),
         };
     }
+
+    /**
+     * NOTE: Can return fractional values. `Math.ceil` will ensure correct value.
+     * @param {number} samples The number of frames in the audio.
+     * @returns {number} The number of frames in the audio.
+     */
+    samples_to_frames(samples) {
+        return ((samples - this.config.offset) / this.config.step);
+    }
+
+    /**
+     * Post-processes the speaker diarization logits output by the model.
+     * @param {Tensor} logits The speaker diarization logits output by the model.
+     * @param {number} num_samples Number of samples in the input audio.
+     * @returns {Array<Array<{ id: number, start: number, end: number, confidence: number }>>} The post-processed speaker diarization results.
+     */
+    post_process_speaker_diarization(logits, num_samples) {
+        const ratio = (
+            num_samples / this.samples_to_frames(num_samples)
+        ) / this.config.sampling_rate;
+
+        const results = [];
+        for (const scores of logits.tolist()) {
+            const accumulated_segments = [];
+
+            let current_speaker = -1;
+            for (let i = 0; i < scores.length; ++i) {
+                const probabilities = softmax(scores[i]);
+                const [score, id] = max(probabilities);
+                const [start, end] = [i, i + 1];
+
+                if (id !== current_speaker) {
+                    // Speaker has changed
+                    current_speaker = id;
+                    accumulated_segments.push({ id, start, end, score });
+                } else {
+                    // Continue the current segment
+                    accumulated_segments.at(-1).end = end;
+                    accumulated_segments.at(-1).score += score;
+                }
+            }
+
+            results.push(accumulated_segments.map(
+                // Convert frame-space to time-space
+                // and compute the confidence
+                ({ id, start, end, score }) => ({
+                    id,
+                    start: start * ratio,
+                    end: end * ratio,
+                    confidence: score / (end - start),
+                })
+            ));
+        }
+        return results;
+    }
+
 }
 
+export class WeSpeakerFeatureExtractor extends FeatureExtractor {
+
+    constructor(config) {
+        super(config);
+
+        const sampling_rate = this.config.sampling_rate;
+        const mel_filters = mel_filter_bank(
+            256, // num_frequency_bins
+            this.config.num_mel_bins, // num_mel_filters
+            20, // min_frequency
+            Math.floor(sampling_rate / 2), // max_frequency
+            sampling_rate, // sampling_rate
+            null, // norm
+            "kaldi", // mel_scale
+            true, // triangularize_in_mel_space
+        );
+
+        // Do padding:
+        for (let i = 0; i < mel_filters.length; ++i) {
+            mel_filters[i].push(0);
+        }
+        this.mel_filters = mel_filters;
+
+        this.window = window_function(400, 'hamming', {
+            periodic: false,
+        })
+        this.min_num_frames = this.config.min_num_frames;
+    }
+
+    /**
+     * Computes the log-Mel spectrogram of the provided audio waveform.
+     * @param {Float32Array|Float64Array} waveform The audio waveform to process.
+     * @returns {Promise<Tensor>} An object containing the log-Mel spectrogram data as a Float32Array and its dimensions as an array of numbers.
+     */
+    async _extract_fbank_features(waveform) {
+        // Kaldi compliance: 16-bit signed integers
+        // 32768 == 2 ** 15
+        waveform = waveform.map((/** @type {number} */ x) => x * 32768)
+
+        return spectrogram(
+            waveform,
+            this.window, // window
+            400, // frame_length
+            160, // hop_length
+            {
+                fft_length: 512,
+                power: 2.0,
+                center: false,
+                preemphasis: 0.97,
+                mel_filters: this.mel_filters,
+                log_mel: 'log',
+                mel_floor: 1.192092955078125e-07,
+                remove_dc_offset: true,
+
+                // Custom
+                transpose: true,
+                min_num_frames: this.min_num_frames,
+            }
+        )
+    }
+
+
+    /**
+     * Asynchronously extracts features from a given audio using the provided configuration.
+     * @param {Float32Array|Float64Array} audio The audio data as a Float32Array/Float64Array.
+     * @returns {Promise<{ input_features: Tensor }>} A Promise resolving to an object containing the extracted input features as a Tensor.
+     */
+    async _call(audio) {
+        validate_audio_inputs(audio, 'WeSpeakerFeatureExtractor');
+
+        const features = (await this._extract_fbank_features(audio)).unsqueeze_(0);
+
+        if (this.config.fbank_centering_span === null) {
+            // center features with global average
+            const meanData = /** @type {Float32Array} */ (features.mean(1).data);
+            const featuresData = /** @type {Float32Array} */(features.data);
+            const [batch_size, num_frames, feature_size] = features.dims;
+
+            for (let i = 0; i < batch_size; ++i) {
+                const offset1 = i * num_frames * feature_size;
+                const offset2 = i * feature_size;
+                for (let j = 0; j < num_frames; ++j) {
+                    const offset3 = offset1 + j * feature_size;
+                    for (let k = 0; k < feature_size; ++k) {
+                        featuresData[offset3 + k] -= meanData[offset2 + k];
+                    }
+                }
+            }
+        }
 
+        return {
+            input_features: features
+        };
+    }
+}
 
 export class SpeechT5FeatureExtractor extends FeatureExtractor { }
 
@@ -2099,6 +2369,23 @@ export class Wav2Vec2ProcessorWithLM extends Processor {
     }
 }
 
+export class PyAnnoteProcessor extends Processor {
+    /**
+     * Calls the feature_extractor function with the given audio input.
+     * @param {any} audio The audio input to extract features from.
+     * @returns {Promise<any>} A Promise that resolves with the extracted features.
+     */
+    async _call(audio) {
+        return await this.feature_extractor(audio)
+    }
+
+    post_process_speaker_diarization(...args) {
+        // @ts-ignore
+        return this.feature_extractor.post_process_speaker_diarization(...args);
+    }
+
+}
+
 export class SpeechT5Processor extends Processor {
     /**
      * Calls the feature_extractor function with the given input.
@@ -2112,6 +2399,110 @@ export class SpeechT5Processor extends Processor {
 
 export class OwlViTProcessor extends Processor { }
 
+export class Florence2Processor extends Processor {
+    constructor(feature_extractor) {
+        super(feature_extractor);
+
+        const {
+            tasks_answer_post_processing_type,
+            task_prompts_without_inputs,
+            task_prompts_with_input,
+        } = feature_extractor.config;
+
+        /** @type {Map<string, string>} */
+        this.tasks_answer_post_processing_type = new Map(Object.entries(tasks_answer_post_processing_type ?? {}));
+
+        /** @type {Map<string, string>} */
+        this.task_prompts_without_inputs = new Map(Object.entries(task_prompts_without_inputs ?? {}));
+
+        /** @type {Map<string, string>} */
+        this.task_prompts_with_input = new Map(Object.entries(task_prompts_with_input ?? {}));
+
+        this.regexes = {
+            quad_boxes: /(.+?)<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm,
+            bboxes: /([^<]+)?<loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/gm,
+        }
+        this.size_per_bin = 1000;
+    }
+
+    /**
+     * Helper function to construct prompts from input texts
+     * @param {string|string[]} text
+     * @returns {string[]}
+     */
+    construct_prompts(text) {
+        if (typeof text === 'string') {
+            text = [text];
+        }
+
+        const prompts = [];
+        for (const t of text) {
+            // 1. fixed task prompts without additional inputs
+            if (this.task_prompts_without_inputs.has(t)) {
+                prompts.push(this.task_prompts_without_inputs.get(t));
+            }
+            // 2. task prompts with additional inputs 
+            else {
+                for (const [task, prompt] of this.task_prompts_with_input) {
+                    if (t.includes(task)) {
+                        prompts.push(prompt.replaceAll('{input}', t).replaceAll(task, ''));
+                        break;
+                    }
+                }
+
+                // 3. default prompt
+                if (prompts.length !== text.length) {
+                    prompts.push(t);
+                }
+            }
+        }
+        return prompts;
+    }
+
+    /**
+     * Post-process the output of the model to each of the task outputs.
+     * @param {string} text The text to post-process.
+     * @param {string} task The task to post-process the text for.
+     * @param {[number, number]} image_size The size of the image. height x width.
+     */
+    post_process_generation(text, task, image_size) {
+        const task_answer_post_processing_type = this.tasks_answer_post_processing_type.get(task) ?? 'pure_text';
+
+        // remove the special tokens
+        text = text.replaceAll('<s>', '').replaceAll('</s>', '');
+
+        let final_answer;
+        switch (task_answer_post_processing_type) {
+            case 'pure_text':
+                final_answer = text;
+                break;
+
+            case 'description_with_bboxes':
+            case 'bboxes':
+            case 'phrase_grounding':
+            case 'ocr':
+                const key = task_answer_post_processing_type === 'ocr' ? 'quad_boxes' : 'bboxes';
+                const matches = text.matchAll(this.regexes[key]);
+                const labels = [];
+                const items = [];
+                for (const [_, label, ...locations] of matches) {
+                    // Push new label, or duplicate the last label
+                    labels.push(label ? label.trim() : labels.at(-1) ?? '');
+                    items.push(locations.map((x, i) =>
+                        // NOTE: Add 0.5 to use the center position of the bin as the coordinate.
+                        (Number(x) + 0.5) / this.size_per_bin * image_size[i % 2])
+                    );
+                }
+                final_answer = { labels, [key]: items };
+                break;
+
+            default:
+                throw new Error(`Task "${task}" (of type "${task_answer_post_processing_type}") not yet implemented.`);
+        }
+
+        return { [task]: final_answer }
+    }
+}
 
 //////////////////////////////////////////////////
 /**
@@ -2151,21 +2542,31 @@ export class AutoProcessor {
         ViTFeatureExtractor,
         MobileViTFeatureExtractor,
         MobileViTImageProcessor,
+        MobileNetV1FeatureExtractor,
+        MobileNetV2FeatureExtractor,
+        MobileNetV3FeatureExtractor,
+        MobileNetV4FeatureExtractor,
         OwlViTFeatureExtractor,
         Owlv2ImageProcessor,
         CLIPFeatureExtractor,
+        CLIPImageProcessor,
+        Florence2Processor,
         ChineseCLIPFeatureExtractor,
         SiglipImageProcessor,
         ConvNextFeatureExtractor,
         ConvNextImageProcessor,
         SegformerFeatureExtractor,
+        SapiensFeatureExtractor,
         BitImageProcessor,
         DPTImageProcessor,
         DPTFeatureExtractor,
+        PvtImageProcessor,
         GLPNFeatureExtractor,
         BeitFeatureExtractor,
         DeiTFeatureExtractor,
         DetrFeatureExtractor,
+        RTDetrImageProcessor,
+        MaskFormerFeatureExtractor,
         YolosFeatureExtractor,
         DonutFeatureExtractor,
         NougatImageProcessor,
@@ -2180,14 +2581,18 @@ export class AutoProcessor {
         SpeechT5FeatureExtractor,
         ASTFeatureExtractor,
         ClapFeatureExtractor,
+        PyAnnoteFeatureExtractor,
+        WeSpeakerFeatureExtractor,
     }
 
     static PROCESSOR_CLASS_MAPPING = {
         WhisperProcessor,
         Wav2Vec2ProcessorWithLM,
+        PyAnnoteProcessor,
         SamProcessor,
         SpeechT5Processor,
         OwlViTProcessor,
+        Florence2Processor,
     }
 
     /**
diff --git a/src/tokenizers.js b/src/tokenizers.js
index 234eef15e..5b4e0170c 100644
--- a/src/tokenizers.js
+++ b/src/tokenizers.js
@@ -5,7 +5,7 @@
  * **Example:** Create an `AutoTokenizer` and use it to tokenize a sentence.
  * This will automatically detect the tokenizer type based on the tokenizer class defined in `tokenizer.json`.
  * ```javascript
- * import { AutoTokenizer } from '@xenova/transformers';
+ * import { AutoTokenizer } from '@huggingface/transformers';
  * 
  * const tokenizer = await AutoTokenizer.from_pretrained('Xenova/bert-base-uncased');
  * const { input_ids } = await tokenizer('I love transformers!');
@@ -19,13 +19,16 @@
  * 
  * @module tokenizers
  */
-
 import {
     Callable,
+} from './utils/generic.js';
+
+import {
     reverseDictionary,
     escapeRegExp,
     isIntegralNumber,
     mergeArrays,
+    len,
 } from './utils/core.js';
 
 import {
@@ -43,6 +46,11 @@ import {
 
 import { Template } from '@huggingface/jinja';
 
+import {
+    WHISPER_LANGUAGE_MAPPING,
+    whisper_language_to_code,
+} from './models/whisper/common_whisper.js';
+import { GITHUB_ISSUE_URL } from './utils/constants.js';
 
 /**
  * @typedef {Object} TokenizerProperties Additional tokenizer-specific properties.
@@ -188,7 +196,7 @@ function clean_up_tokenization(text) {
  * @returns {string} The text with accents removed.
  */
 function remove_accents(text) {
-    return text.replace(/[\u0300-\u036f]/g, '');
+    return text.replace(/\p{M}/gu, '');
 }
 
 /**
@@ -200,24 +208,55 @@ function lowercase_and_remove_accent(text) {
     return remove_accents(text.toLowerCase());
 }
 
+
+/**
+ * Checks whether the given Unicode codepoint represents a CJK (Chinese, Japanese, or Korean) character.
+ *
+ * A "chinese character" is defined as anything in the CJK Unicode block:
+ * https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+ *
+ * Note that the CJK Unicode block is NOT all Japanese and Korean characters, despite its name.
+ * The modern Korean Hangul alphabet is a different block, as is Japanese Hiragana and Katakana.
+ * Those alphabets are used to write space-separated words, so they are not treated specially
+ * and are handled like all other languages.
+ *
+ * @param {number|bigint} cp The Unicode codepoint to check.
+ * @returns {boolean} True if the codepoint represents a CJK character, false otherwise.
+ */
+export function is_chinese_char(cp) {
+    return (
+        (cp >= 0x4E00 && cp <= 0x9FFF)
+        || (cp >= 0x3400 && cp <= 0x4DBF)
+        || (cp >= 0x20000 && cp <= 0x2A6DF)
+        || (cp >= 0x2A700 && cp <= 0x2B73F)
+        || (cp >= 0x2B740 && cp <= 0x2B81F)
+        || (cp >= 0x2B820 && cp <= 0x2CEAF)
+        || (cp >= 0xF900 && cp <= 0xFAFF)
+        || (cp >= 0x2F800 && cp <= 0x2FA1F)
+    )
+}
+
 /**
- * Helper function to fuse consecutive values in an array equal to the specified value.
- * @param {string[]} arr The input array
- * @param {any} value The value to fuse on.
- * @param {Map<string, any>} mapping The mapping from input domain to value.
+ * Helper function to fuse consecutive unknown tokens.
+ * @param {string[]} arr The list of input tokens
+ * @param {Map<string, any>} tokens_to_ids The mapping from tokens to token ids.
+ * @param {number} unk_token_id The value to fuse on.
+ * @private
  */
-function fuse(arr, value, mapping) {
+function fuse_unk(arr, tokens_to_ids, unk_token_id) {
     const fused = [];
     let i = 0;
     while (i < arr.length) {
         fused.push(arr[i])
-        if ((mapping.get(arr[i]) ?? value) !== value) {
+        if ((tokens_to_ids.get(arr[i]) ?? unk_token_id) !== unk_token_id) {
             ++i;
             continue;
         }
 
-        while (i < arr.length && (mapping.get(arr[i]) ?? value) === value) {
-            ++i;
+        while (++i < arr.length && (tokens_to_ids.get(arr[i]) ?? unk_token_id) === unk_token_id) {
+            if (tokens_to_ids.get(fused.at(-1)) !== unk_token_id) {
+                fused[fused.length - 1] += arr[i];
+            }
         }
     }
 
@@ -234,12 +273,18 @@ function whitespace_split(text) {
 }
 
 const PUNCTUATION_REGEX = '\\p{P}\\u0021-\\u002F\\u003A-\\u0040\\u005B-\\u0060\\u007B-\\u007E';
+const PUNCTUATION_ONLY_REGEX = new RegExp(`^[${PUNCTUATION_REGEX}]+$`, 'gu');
+const BLOOM_SPLIT_CHARS = '.,!?\u2026\u3002\uff0c\u3001\u0964\u06d4\u060c';
 
-// A mapping of regex patterns to their equivalent (but longer) JS-compatible versions.
+// A mapping of regex patterns to their equivalent (but possibly longer) JS-compatible versions.
 const PROBLEMATIC_REGEX_MAP = new Map([
     // This uses the case insensitive group modifier, which is not supported in JavaScript.
     // When parsing the regex, an "Invalid group" error is thrown.
     ["(?i:'s|'t|'re|'ve|'m|'ll|'d)", "(?:'([sS]|[tT]|[rR][eE]|[vV][eE]|[mM]|[lL][lL]|[dD]))"],
+
+    // Used to override the default (invalid) regex of the bloom pretokenizer.
+    // For more information, see https://github.com/huggingface/transformers.js/issues/94
+    [` ?[^(\\s|[${BLOOM_SPLIT_CHARS}])]+`, ` ?[^\\s${BLOOM_SPLIT_CHARS}]+`],
 ])
 
 
@@ -317,14 +362,21 @@ export class TokenizerModel extends Callable {
             case 'Unigram':
                 // @ts-ignore
                 return new Unigram(config, ...args);
-
             case 'BPE':
                 return new BPE(config);
 
             default:
+                // Some tokenizers, like for google-t5/t5-small, do not have a `type` field.
+                // In this case, we can infer the tokenizer type based on the structure of the `vocab` field.
                 if (config.vocab) {
-                    // @ts-ignore
-                    return new LegacyTokenizerModel(config, ...args);
+                    if (Array.isArray(config.vocab)) {
+                        // config.vocab is of type `[string, number][]`
+                        // @ts-ignore
+                        return new Unigram(config, ...args);
+                    } else {
+                        // @ts-ignore
+                        return new LegacyTokenizerModel(config, ...args);
+                    }
                 }
                 throw new Error(`Unknown TokenizerModel type: ${config.type}`);
         }
@@ -333,15 +385,15 @@ export class TokenizerModel extends Callable {
     /**
      * Internal function to call the TokenizerModel instance.
      * @param {string[]} tokens The tokens to encode.
-     * @returns {string[]} The encoded token IDs.
+     * @returns {string[]} The encoded tokens.
      */
     _call(tokens) {
-        let ids = this.encode(tokens);
+        tokens = this.encode(tokens);
         if (this.fuse_unk) {
             // Fuse unknown tokens
-            ids = fuse(ids, this.unk_token_id, this.tokens_to_ids);
+            tokens = fuse_unk(tokens, this.tokens_to_ids, this.unk_token_id);
         }
-        return ids;
+        return tokens;
     }
 
     /**
@@ -365,7 +417,7 @@ export class TokenizerModel extends Callable {
 
     /**
      * Converts a list of token IDs into a list of tokens.
-     * @param {number[]} ids The token IDs to convert.
+     * @param {number[]|bigint[]} ids The token IDs to convert.
      * @returns {string[]} The converted tokens.
      */
     convert_ids_to_tokens(ids) {
@@ -502,18 +554,18 @@ class Unigram extends TokenizerModel {
         this.unk_token = this.vocab[config.unk_id];
 
         this.tokens_to_ids = new Map(this.vocab.map((x, i) => [x, i]));
-        this.bosToken = ' '; // beginning of a sentence token
+        this.bos_token = ' '; // beginning of a sentence token
 
-        this.bosTokenId = this.tokens_to_ids.get(this.bosToken); // NOTE: may be undefined
-        this.eosToken = moreConfig.eos_token;
+        this.bos_token_id = this.tokens_to_ids.get(this.bos_token); // NOTE: may be undefined
+        this.eos_token = moreConfig.eos_token;
 
-        this.eosTokenId = this.tokens_to_ids.get(this.eosToken);
-        this.unkToken = this.vocab[this.unk_token_id];
+        this.eos_token_id = this.tokens_to_ids.get(this.eos_token);
+        this.unk_token = this.vocab[this.unk_token_id];
 
         this.minScore = min(this.scores)[0];
 
-        this.unkScore = this.minScore - 10.0;
-        this.scores[this.unk_token_id] = this.unkScore;
+        this.unk_score = this.minScore - 10.0;
+        this.scores[this.unk_token_id] = this.unk_score;
 
         this.trie = new CharTrie();
         this.trie.extend(this.vocab);
@@ -528,26 +580,27 @@ class Unigram extends TokenizerModel {
      * @param {TokenLattice} lattice The token lattice to populate with nodes.
      */
     populateNodes(lattice) {
-        const sentence = lattice.sentence;
-        const len = sentence.length;
+        const chars = lattice.chars;
+        const mblen = 1;
         let beginPos = 0;
-        while (beginPos < len) {
-            const mblen = 1;
+        while (beginPos < chars.length) {
             let hasSingleNode = false;
-            const tokens = [];
 
-            for (let token of this.trie.commonPrefixSearch(sentence.slice(beginPos))) {
+            const tokens = [];
+            const sliced = chars.slice(beginPos).join('');
+            const prefixedTokens = this.trie.commonPrefixSearch(sliced);
+            for (const token of prefixedTokens) {
                 tokens.push(token);
                 const tokenId = this.tokens_to_ids.get(token);
                 const tokenScore = this.scores[tokenId];
-                const n = token.length;
+                const n = len(token);
                 lattice.insert(beginPos, n, tokenScore, tokenId);
                 if (!hasSingleNode && n === mblen) {
                     hasSingleNode = true;
                 }
             }
             if (!hasSingleNode) {
-                lattice.insert(beginPos, mblen, this.unkScore, this.unk_token_id);
+                lattice.insert(beginPos, mblen, this.unk_score, this.unk_token_id);
             }
             beginPos += mblen;
         }
@@ -560,7 +613,7 @@ class Unigram extends TokenizerModel {
      * @returns {string[]} An array of subtokens obtained by encoding the input tokens using the unigram model.
      */
     tokenize(normalized) {
-        const lattice = new TokenLattice(normalized, this.bosTokenId, this.eosTokenId);
+        const lattice = new TokenLattice(normalized, this.bos_token_id, this.eos_token_id);
         this.populateNodes(lattice);
         return lattice.tokens();
     }
@@ -630,7 +683,7 @@ class BPE extends TokenizerModel {
      * Create a BPE instance.
      * @param {Object} config The configuration object for BPE.
      * @param {Object} config.vocab A mapping of tokens to ids.
-     * @param {string[]} config.merges An array of BPE merges as strings.
+     * @param {string[]|[string, string][]} config.merges An array of BPE merges as strings.
      * @param {string} config.unk_token The unknown token used for out of vocabulary words.
      * @param {string} config.end_of_word_suffix The suffix to place at the end of each word.
      * @param {string} [config.continuing_subword_suffix] The suffix to insert between words.
@@ -640,8 +693,6 @@ class BPE extends TokenizerModel {
     constructor(config) {
         super(config);
 
-        this.BPE_SPLIT_TOKEN = ' ';
-
         /** @type {Map<string, number>} */
         this.tokens_to_ids = objectToMap(config.vocab);
 
@@ -653,8 +704,15 @@ class BPE extends TokenizerModel {
             this.vocab[value] = key;
         }
 
-        this.bpe_ranks = new Map(config.merges.map((x, i) => [x, i]));
-        this.merges = config.merges.map(x => x.split(this.BPE_SPLIT_TOKEN));
+        // Tokenizers >= 0.20.0 serializes BPE merges as a [string, string][] instead of a string[],
+        // which resolves the ambiguity for merges containing spaces.
+        const use_new_merge_format = Array.isArray(config.merges[0]);
+
+        /** @type {[string, string][]} */
+        this.merges = use_new_merge_format
+            ? /** @type {[string, string][]} */(config.merges)
+            : (/** @type {string[]} */(config.merges)).map(x => /** @type {[string, string]} */(x.split(' ', 2)));
+        this.bpe_ranks = new Map(this.merges.map((x, i) => [JSON.stringify(x), i]));
 
         this.end_of_word_suffix = config.end_of_word_suffix;
 
@@ -814,7 +872,7 @@ class BPE extends TokenizerModel {
         // `score` is a measure of the merge priority: lower means higher priority
         // We use the BPE rank as a measure of priority (i.e., the local of the merge in the merges list)
         // We also add a fractional component to the score to break ties (with the earlier character having higher priority)
-        const rank = this.bpe_ranks.get(node.token + this.BPE_SPLIT_TOKEN + node.next.token);
+        const rank = this.bpe_ranks.get(JSON.stringify([node.token, node.next.token]));
         if (rank !== undefined) {
             node.score = rank + node.bias;
             queue.push(node);
@@ -839,15 +897,19 @@ class BPE extends TokenizerModel {
             for (const t of bpe_token_list) {
                 if (this.tokens_to_ids.has(t)) {
                     outputTokens.push(t);
-                } else {
-                    if (this.byte_fallback) {
-                        outputTokens.push(
-                            ...Array.from(this.text_encoder.encode(t))
-                                .map(x => `<0x${x.toString(16).toUpperCase().padStart(2, '0')}>`)
-                        );
+                } else if (this.byte_fallback) {
+                    const byteTokens = Array.from(this.text_encoder.encode(t))
+                        .map(x => `<0x${x.toString(16).toUpperCase().padStart(2, '0')}>`);
+                    if (byteTokens.every(x => this.tokens_to_ids.has(x))) {
+                        // Ensure the byte tokens are actually in the vocabulary, otherwise
+                        // we fall back to the unknown token. For more information, see
+                        // https://github.com/huggingface/transformers/issues/28096.
+                        outputTokens.push(...byteTokens);
                     } else {
                         outputTokens.push(this.unk_token);
                     }
+                } else {
+                    outputTokens.push(this.unk_token);
                 }
             }
         }
@@ -1154,7 +1216,7 @@ class BertNormalizer extends Normalizer {
         for (let i = 0; i < text.length; ++i) {
             const char = text[i];
             const cp = char.charCodeAt(0);
-            if (this._is_chinese_char(cp)) {
+            if (is_chinese_char(cp)) {
                 output.push(" ");
                 output.push(char);
                 output.push(" ");
@@ -1165,39 +1227,14 @@ class BertNormalizer extends Normalizer {
         return output.join("");
     }
 
-    /**
-     * Checks whether the given Unicode codepoint represents a CJK (Chinese, Japanese, or Korean) character.
-     *
-     * A "chinese character" is defined as anything in the CJK Unicode block:
-     * https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-     *
-     * Note that the CJK Unicode block is NOT all Japanese and Korean characters, despite its name.
-     * The modern Korean Hangul alphabet is a different block, as is Japanese Hiragana and Katakana.
-     * Those alphabets are used to write space-separated words, so they are not treated specially
-     * and are handled like all other languages.
-     *
-     * @param {number} cp The Unicode codepoint to check.
-     * @returns {boolean} True if the codepoint represents a CJK character, false otherwise.
-     */
-    _is_chinese_char(cp) {
-        return (
-            (cp >= 0x4E00 && cp <= 0x9FFF)
-            || (cp >= 0x3400 && cp <= 0x4DBF)
-            || (cp >= 0x20000 && cp <= 0x2A6DF)
-            || (cp >= 0x2A700 && cp <= 0x2B73F)
-            || (cp >= 0x2B740 && cp <= 0x2B81F)
-            || (cp >= 0x2B820 && cp <= 0x2CEAF)
-            || (cp >= 0xF900 && cp <= 0xFAFF)
-            || (cp >= 0x2F800 && cp <= 0x2FA1F)
-        )
-    }
     /**
      * Strips accents from the given text.
      * @param {string} text The text to strip accents from.
      * @returns {string} The text with accents removed.
      */
     stripAccents(text) {
-        return text.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
+        // "Mark, Nonspacing" (Mn)
+        return text.normalize('NFD').replace(/\p{Mn}/gu, '');
     }
 
 
@@ -2315,7 +2352,7 @@ class Precompiled extends Normalizer {
         // TODO: detect when a different `this.charsmap` is used.
 
         text = text.replace(/[\u0001-\u0008\u000B\u000E-\u001F\u007F\u008F\u009F]/gm, ''); // Remove control characters
-        text = text.replace(/[\u0009\u000A\u000C\u000D\u1680\u200B\u200C\u200E\u200F\u2028\u2029\u2581\uFEFF\uFFFD]/gm, '\u0020'); // Replace certain characters with a space
+        text = text.replace(/[\u0009\u000A\u000C\u000D\u00A0\u1680\u2000-\u200F\u2028\u2029\u202F\u205F\u2581\u3000\uFEFF\uFFFD]/gm, '\u0020'); // Replace certain characters with a space
 
         if (text.includes('\uFF5E')) {
             // To match the sentencepiece implementation 100%, we must handle a very strange edge-case.
@@ -2452,7 +2489,7 @@ const SPECIAL_TOKEN_ATTRIBUTES = [
  * @param {Record<string, any[]>} item The input object.
  * @param {number} length The length to pad to.
  * @param {(key: string) => any} value_fn Determine the value to fill the array, based on its key.
- * @param {'right'|'left'} side Which side to pad the array.
+ * @param {string} side Which side to pad the array.
  * @private
  */
 function padHelper(item, length, value_fn, side) {
@@ -2492,8 +2529,7 @@ function truncateHelper(item, length) {
 export class PreTrainedTokenizer extends Callable {
     return_token_type_ids = false;
 
-    _default_chat_template = `{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}`;
-
+    padding_side = 'right';
     /**
      * Create a new PreTrainedTokenizer instance.
      * @param {Object} tokenizerJSON The JSON of the tokenizer.
@@ -2541,14 +2577,17 @@ export class PreTrainedTokenizer extends Callable {
 
             // Another slight hack to add `end_of_word_suffix` (if present) to the decoder
             // This is needed for cases where BPE model and ByteLevel decoder are used
-            // For more information, see https://github.com/xenova/transformers.js/issues/74
+            // For more information, see https://github.com/huggingface/transformers.js/issues/74
             // TODO: save this to the decoder when exporting?
             this.decoder.end_of_word_suffix = this.model.end_of_word_suffix;
         }
 
-
         this.added_tokens_regex = this.added_tokens.length > 0 ? new RegExp(
-            this.added_tokens.map(x => `${x.lstrip ? '\\s*' : ''}(${escapeRegExp(x.content)})${x.rstrip ? '\\s*' : ''}`).join('|')
+            this.added_tokens.slice()
+                // Sort by length (desc) to avoid early partial matches
+                .sort((a, b) => b.content.length - a.content.length)
+                .map(x => `${x.lstrip ? '\\s*' : ''}(${escapeRegExp(x.content)})${x.rstrip ? '\\s*' : ''}`)
+                .join('|')
         ) : null;
 
         // Set mask token if present (otherwise will be undefined, which is fine)
@@ -2572,9 +2611,9 @@ export class PreTrainedTokenizer extends Callable {
         this.clean_up_tokenization_spaces = tokenizerConfig.clean_up_tokenization_spaces ?? true;
         this.do_lowercase_and_remove_accent = tokenizerConfig.do_lowercase_and_remove_accent ?? false;
 
-        // TODO allow user to change this
-        /** @type {'right'|'left'} */
-        this.padding_side = 'right';
+        if (tokenizerConfig.padding_side) {
+            this.padding_side = tokenizerConfig.padding_side;
+        }
 
         this.legacy = false;
 
@@ -2599,6 +2638,7 @@ export class PreTrainedTokenizer extends Callable {
      * @param {...string} keys One or more keys to search for in the tokenizer config object.
      * @returns {string|null} The value associated with the first matching key, or null if no match is found.
      * @throws {Error} If an object is found for a matching key and its __type property is not "AddedToken".
+     * @private
      */
     getToken(...keys) {
         for (const key of keys) {
@@ -2707,11 +2747,11 @@ export class PreTrainedTokenizer extends Callable {
                 }
 
                 encodedTokens = text.map(
-                    (t, i) => this._encode_plus(t, text_pair[i], { add_special_tokens, return_token_type_ids })
+                    (t, i) => this._encode_plus(t, { text_pair: text_pair[i], add_special_tokens, return_token_type_ids })
                 )
 
             } else {
-                encodedTokens = text.map(x => this._encode_plus(x, null, { add_special_tokens, return_token_type_ids }));
+                encodedTokens = text.map(x => this._encode_plus(x, { add_special_tokens, return_token_type_ids }));
             }
 
         } else {
@@ -2724,7 +2764,7 @@ export class PreTrainedTokenizer extends Callable {
             }
 
             // For single input, we just wrap in an array, and then unwrap later.
-            encodedTokens = [this._encode_plus(text, text_pair, { add_special_tokens, return_token_type_ids })];
+            encodedTokens = [this._encode_plus(text, { text_pair, add_special_tokens, return_token_type_ids })];
         }
         // At this point, tokens is batched: [batch_size, tokens]
         // However, array may be jagged. So, we pad to max_length
@@ -2743,7 +2783,7 @@ export class PreTrainedTokenizer extends Callable {
         }
 
         // Ensure it is less than model max length
-        max_length = Math.min(max_length, this.model_max_length)
+        max_length = Math.min(max_length, this.model_max_length ?? Infinity);
 
         if (padding || truncation) {
 
@@ -2879,56 +2919,88 @@ export class PreTrainedTokenizer extends Callable {
      * Encodes a single text or a pair of texts using the model's tokenizer.
      *
      * @param {string} text The text to encode.
-     * @param {string|null} text_pair The optional second text to encode.
      * @param {Object} options An optional object containing the following properties:
+     * @param {string} [options.text_pair=null] The optional second text to encode.
      * @param {boolean} [options.add_special_tokens=true] Whether or not to add the special tokens associated with the corresponding model.
      * @param {boolean} [options.return_token_type_ids=null] Whether to return token_type_ids.
      * @returns {EncodingSingle} An object containing the encoded text.
      * @private
      */
-    _encode_plus(text, text_pair = null, {
+    _encode_plus(text, {
+        text_pair = null,
         add_special_tokens = true,
         return_token_type_ids = null,
     } = {}) {
-        // Function called by users to encode possibly multiple texts
-        const tokens = this._encode_text(text);
-        const tokens2 = this._encode_text(text_pair);
 
-        const combinedTokens = this.post_processor
-            ? this.post_processor(tokens, tokens2, { add_special_tokens })
-            : { tokens: mergeArrays(tokens ?? [], tokens2 ?? []) };
+        const { tokens, token_type_ids } = this._tokenize_helper(text, { pair: text_pair, add_special_tokens });
 
-        const input_ids = this.model.convert_tokens_to_ids(combinedTokens.tokens);
+        const input_ids = this.model.convert_tokens_to_ids(tokens);
 
         const result = {
             input_ids,
             attention_mask: new Array(input_ids.length).fill(1),
         }
-        if ((return_token_type_ids ?? this.return_token_type_ids) && combinedTokens.token_type_ids) {
-            result.token_type_ids = combinedTokens.token_type_ids;
+        if ((return_token_type_ids ?? this.return_token_type_ids) && token_type_ids) {
+            result.token_type_ids = token_type_ids;
         }
         return result;
     }
 
+    /**
+     * Internal helper function to tokenize a text, and optionally a pair of texts.
+     * @param {string} text The text to tokenize.
+     * @param {Object} options An optional object containing the following properties:
+     * @param {string} [options.pair=null] The optional second text to tokenize.
+     * @param {boolean} [options.add_special_tokens=false] Whether or not to add the special tokens associated with the corresponding model.
+     * @returns {{tokens: string[], token_type_ids?: number[]}} An object containing the tokens and optionally the token type IDs.
+     */
+    _tokenize_helper(text, {
+        pair = null,
+        add_special_tokens = false,
+    } = {}) {
+        const tokens = this._encode_text(text);
+        const tokens2 = this._encode_text(pair);
+
+        return this.post_processor
+            ? this.post_processor(tokens, tokens2, { add_special_tokens })
+            : { tokens: mergeArrays(tokens ?? [], tokens2 ?? []) };
+    }
+
+    /**
+     * Converts a string into a sequence of tokens.
+     * @param {string} text The sequence to be encoded.
+     * @param {Object} options An optional object containing the following properties:
+     * @param {string} [options.pair] A second sequence to be encoded with the first.
+     * @param {boolean} [options.add_special_tokens=false] Whether or not to add the special tokens associated with the corresponding model.
+     * @returns {string[]} The list of tokens.
+     */
+    tokenize(text, {
+        pair = null,
+        add_special_tokens = false,
+    } = {}) {
+        return this._tokenize_helper(text, { pair, add_special_tokens }).tokens;
+    }
+
     /**
      * Encodes a single text or a pair of texts using the model's tokenizer.
      *
      * @param {string} text The text to encode.
-     * @param {string|null} text_pair The optional second text to encode.
      * @param {Object} options An optional object containing the following properties:
+     * @param {string} [options.text_pair=null] The optional second text to encode.
      * @param {boolean} [options.add_special_tokens=true] Whether or not to add the special tokens associated with the corresponding model.
      * @param {boolean} [options.return_token_type_ids=null] Whether to return token_type_ids.
      * @returns {number[]} An array of token IDs representing the encoded text(s).
      */
-    encode(text, text_pair = null, {
+    encode(text, {
+        text_pair = null,
         add_special_tokens = true,
         return_token_type_ids = null,
     } = {}) {
-        const { input_ids } = this._encode_plus(text, text_pair, {
+        return this._encode_plus(text, {
+            text_pair,
             add_special_tokens,
             return_token_type_ids,
-        });
-        return input_ids;
+        }).input_ids;
     }
 
     /**
@@ -2947,7 +3019,7 @@ export class PreTrainedTokenizer extends Callable {
     /**
      * Decodes a sequence of token IDs back to a string.
      *
-     * @param {number[]|Tensor} token_ids List/Tensor of token IDs to decode.
+     * @param {number[]|bigint[]|Tensor} token_ids List/Tensor of token IDs to decode.
      * @param {Object} [decode_args={}]
      * @param {boolean} [decode_args.skip_special_tokens=false] If true, special tokens are removed from the output string.
      * @param {boolean} [decode_args.clean_up_tokenization_spaces=true] If true, spaces before punctuations and abbreviated forms are removed.
@@ -2972,7 +3044,7 @@ export class PreTrainedTokenizer extends Callable {
 
     /**
      * Decode a single list of token ids to a string.
-     * @param {number[]} token_ids List of token ids to decode
+     * @param {number[]|bigint[]} token_ids List of token ids to decode
      * @param {Object} decode_args Optional arguments for decoding
      * @param {boolean} [decode_args.skip_special_tokens=false] Whether to skip special tokens during decoding
      * @param {boolean} [decode_args.clean_up_tokenization_spaces=null] Whether to clean up tokenization spaces during decoding.
@@ -3012,32 +3084,77 @@ export class PreTrainedTokenizer extends Callable {
         return decoded;
     }
 
-    get default_chat_template() {
-        if (!this._warned_about_chat_template) {
-            console.warn(
-                "No chat template is defined for this tokenizer - using a default chat template " +
-                "that implements the ChatML format. If the default is not appropriate for " +
-                "your model, please set `tokenizer.chat_template` to an appropriate template. " +
-                "See https://huggingface.co/docs/transformers/main/chat_templating for more information."
-            )
-            this._warned_about_chat_template = true; // TODO move to logger.warning_once()
-        }
+    /**
+     * Retrieve the chat template string used for tokenizing chat messages. This template is used
+     * internally by the `apply_chat_template` method and can also be used externally to retrieve the model's chat
+     * template for better generation tracking.
+     * 
+     * @param {Object} options An optional object containing the following properties:
+     * @param {string} [options.chat_template=null]
+     * A Jinja template or the name of a template to use for this conversion.
+     * It is usually not necessary to pass anything to this argument,
+     * as the model's template will be used by default.
+     * @param {Object[]} [options.tools=null]
+     * A list of tools (callable functions) that will be accessible to the model. If the template does not
+     * support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
+     * giving the name, description and argument types for the tool. See our
+     * [chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use)
+     * for more information.
+     * @returns {string} The chat template string.
+     */
+    get_chat_template({
+        chat_template = null,
+        tools = null,
+    } = {}) {
 
-        return this._default_chat_template;
+        // First, handle the cases when the model has a dict of multiple templates
+        if (this.chat_template && typeof this.chat_template === 'object') {
+            const template_dict = this.chat_template;
+
+            if (chat_template !== null && Object.hasOwn(template_dict, chat_template)) {
+                // The user can pass the name of a template to the chat template argument instead of an entire template
+                chat_template = template_dict[chat_template];
+            } else if (chat_template === null) {
+                if (tools !== null && 'tool_use' in template_dict) {
+                    chat_template = template_dict['tool_use'];
+                } else if ('default' in template_dict) {
+                    chat_template = template_dict['default'];
+                } else {
+                    throw Error(
+                        `This model has multiple chat templates with no default specified! Please either pass a chat ` +
+                        `template or the name of the template you wish to use to the 'chat_template' argument. Available ` +
+                        `template names are ${Object.keys(template_dict).sort()}.`
+                    )
+                }
+            }
+        } else if (chat_template === null) {
+            // These are the cases when the model has a single template
+            // priority: `chat_template` argument > `tokenizer.chat_template`
+            if (this.chat_template) {
+                chat_template = this.chat_template;
+            } else {
+                throw Error(
+                    "Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template " +
+                    "argument was passed! For information about writing templates and setting the " +
+                    "tokenizer.chat_template attribute, please see the documentation at " +
+                    "https://huggingface.co/docs/transformers/main/en/chat_templating"
+                )
+            }
+        }
+        return chat_template;
     }
 
     /**
      * Converts a list of message objects with `"role"` and `"content"` keys to a list of token
      * ids. This method is intended for use with chat models, and will read the tokenizer's chat_template attribute to
-     * determine the format and control tokens to use when converting. When chat_template is None, it will fall back
-     * to the default_chat_template specified at the class level.
+     * determine the format and control tokens to use when converting.
      * 
      * See [here](https://huggingface.co/docs/transformers/chat_templating) for more information.
      * 
      * **Example:** Applying a chat template to a conversation.
      * 
      * ```javascript
-     * import { AutoTokenizer } from "@xenova/transformers";
+     * import { AutoTokenizer } from "@huggingface/transformers";
      * 
      * const tokenizer = await AutoTokenizer.from_pretrained("Xenova/mistral-tokenizer-v1");
      * 
@@ -3054,10 +3171,23 @@ export class PreTrainedTokenizer extends Callable {
      * // [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793]
      * ```
      * 
-     * @param {Message[]} conversation A list of message objects with `"role"` and `"content"` keys.
+     * @param {Message[]} conversation A list of message objects with `"role"` and `"content"` keys,
+     * representing the chat history so far.
      * @param {Object} options An optional object containing the following properties:
      * @param {string} [options.chat_template=null] A Jinja template to use for this conversion. If
-     * this is not passed, the model's default chat template will be used instead.
+     * this is not passed, the model's chat template will be used instead.
+     * @param {Object[]} [options.tools=null]
+     * A list of tools (callable functions) that will be accessible to the model. If the template does not
+     * support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
+     * giving the name, description and argument types for the tool. See our
+     * [chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use)
+     * for more information.
+     * @param {Record<string, string>[]} [options.documents=null]
+     * A list of dicts representing documents that will be accessible to the model if it is performing RAG
+     * (retrieval-augmented generation). If the template does not support RAG, this argument will have no
+     * effect. We recommend that each document should be a dict containing "title" and "text" keys. Please
+     * see the RAG section of the [chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#arguments-for-RAG)
+     * for examples of passing documents with chat templates.
      * @param {boolean} [options.add_generation_prompt=false] Whether to end the prompt with the token(s) that indicate
      * the start of an assistant message. This is useful when you want to generate a response from the model.
      * Note that this argument will be passed to the chat template, and so it must be supported in the
@@ -3068,10 +3198,13 @@ export class PreTrainedTokenizer extends Callable {
      * @param {number} [options.max_length=null] Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false.
      * If not specified, the tokenizer's `max_length` attribute will be used as a default.
      * @param {boolean} [options.return_tensor=true] Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false.
+     * @param {boolean} [options.return_dict=true] Whether to return a dictionary with named outputs. Has no effect if tokenize is false.
      * @param {Object} [options.tokenizer_kwargs={}] Additional options to pass to the tokenizer.
-     * @returns {string | Tensor | number[]| number[][]} The tokenized output.
+     * @returns {string | Tensor | number[]| number[][]|BatchEncoding} The tokenized output.
      */
     apply_chat_template(conversation, {
+        tools = null,
+        documents = null,
         chat_template = null,
         add_generation_prompt = false,
         tokenize = true,
@@ -3079,34 +3212,13 @@ export class PreTrainedTokenizer extends Callable {
         truncation = false,
         max_length = null,
         return_tensor = true,
+        return_dict = false,
         tokenizer_kwargs = {},
         ...kwargs
     } = {}) {
 
-        // First, handle the cases when the model has a dict of multiple templates
-        if (
-            (this.chat_template && typeof this.chat_template === 'object') ||
-            (this.chat_template === null && this.default_chat_template && typeof this.default_chat_template === 'object')
-        ) {
-            const template_dict = this.chat_template ?? this.default_chat_template; // Guaranteed to be a non-null object
+        chat_template = this.get_chat_template({ chat_template, tools });
 
-            if (chat_template !== null && Object.hasOwn(template_dict, chat_template)) {
-                // The user can pass the name of a template to the chat template argument instead of an entire template
-                chat_template = template_dict[chat_template];
-            } else if (chat_template === null && 'default' in template_dict) {
-                chat_template = template_dict['default'];
-            } else if (chat_template === null) {
-                throw Error(
-                    `This model has multiple chat templates with no default specified! Please either pass a chat ` +
-                    `template or the name of the template you wish to use to the 'chat_template' argument. Available ` +
-                    `template names are ${Object.keys(template_dict).sort()}.`
-                )
-            }
-        } else {
-            // These are the cases when the model has a single template
-            // priority: `chat_template` argument > `tokenizer.chat_template` > `tokenizer.default_chat_template
-            chat_template ??= this.chat_template ?? this.default_chat_template;
-        }
         if (typeof chat_template !== 'string') {
             throw Error(`chat_template must be a string, but got ${typeof chat_template}`);
         }
@@ -3128,21 +3240,23 @@ export class PreTrainedTokenizer extends Callable {
 
         const rendered = compiledTemplate.render({
             messages: conversation,
-            add_generation_prompt: add_generation_prompt,
-
+            add_generation_prompt,
+            tools,
+            documents,
             ...special_tokens_map,
             ...kwargs,
         });
 
         if (tokenize) {
-            return this._call(rendered, {
+            const out = this._call(rendered, {
                 add_special_tokens: false,
                 padding,
                 truncation,
                 max_length,
                 return_tensor,
                 ...tokenizer_kwargs,
-            }).input_ids;
+            });
+            return return_dict ? out : out.input_ids;
         }
 
         return rendered;
@@ -3199,9 +3313,7 @@ export class ElectraTokenizer extends PreTrainedTokenizer {
 }
 
 export class T5Tokenizer extends PreTrainedTokenizer { }
-export class GPT2Tokenizer extends PreTrainedTokenizer {
-    _default_chat_template = `{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}`
-}
+export class GPT2Tokenizer extends PreTrainedTokenizer { }
 export class BartTokenizer extends PreTrainedTokenizer { }
 export class MBartTokenizer extends PreTrainedTokenizer {
     constructor(tokenizerJSON, tokenizerConfig) {
@@ -3227,35 +3339,16 @@ export class MBart50Tokenizer extends MBartTokenizer { } // NOTE: extends MBartT
 
 export class RobertaTokenizer extends PreTrainedTokenizer { }
 
-export class BloomTokenizer extends GPT2Tokenizer { // NOTE: `GPT2Tokenizer` to get the correct chat template
-
-    constructor(tokenizerJSON, tokenizerConfig) {
-        // Override the default (invalid) regex of the pretokenizer.
-        // For more information, see https://github.com/xenova/transformers.js/issues/94
-        const splitChars = '.,!?\u2026\u3002\uff0c\u3001\u0964\u06d4\u060c';
-        const patternObject = tokenizerJSON.pre_tokenizer?.pretokenizers[0]?.pattern;
-        if (patternObject && patternObject.Regex === ` ?[^(\\s|[${splitChars}])]+`) {
-            patternObject.Regex = ` ?[^\\s${splitChars}]+`;
-        }
-        super(tokenizerJSON, tokenizerConfig);
-    }
-}
+export class BloomTokenizer extends PreTrainedTokenizer { }
 
 const SPIECE_UNDERLINE = "▁";
 
 export class LlamaTokenizer extends PreTrainedTokenizer {
-    _default_chat_template = `{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\n' + system_message + '\n<</SYS>>\n\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\n' + content.strip() + '\n<</SYS>>\n\n' }}{% elif message['role'] == 'assistant' %}{{ ' '  + content.strip() + ' ' + eos_token }}{% endif %}{% endfor %}`
 
-    DEFAULT_SYSTEM_PROMPT =
-        "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your " +
-        "answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure " +
-        "that your responses are socially unbiased and positive in nature.\n\n" +
-        "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not " +
-        "correct. If you don't know the answer to a question, please don't share false information."
+    padding_side = 'left';
 
     constructor(tokenizerJSON, tokenizerConfig) {
         super(tokenizerJSON, tokenizerConfig);
-        this.use_default_system_prompt = tokenizerConfig.use_default_system_prompt ?? false;
 
         this.legacy = tokenizerConfig.legacy ?? true;
         if (!this.legacy) {
@@ -3288,14 +3381,8 @@ export class LlamaTokenizer extends PreTrainedTokenizer {
         }
         return tokens;
     }
-
-    get default_chat_template() {
-        return super.default_chat_template
-            .replaceAll('USE_DEFAULT_PROMPT', this.use_default_system_prompt ? 'true' : 'false')
-            .replaceAll('DEFAULT_SYSTEM_MESSAGE', this.DEFAULT_SYSTEM_PROMPT.replaceAll("\n", "\\n").replaceAll("'", "\\'"));
-    }
 }
-export class CodeLlamaTokenizer extends LlamaTokenizer { } // NOTE: `LlamaTokenizer` to get the correct chat template
+export class CodeLlamaTokenizer extends PreTrainedTokenizer { }
 
 export class XLMRobertaTokenizer extends PreTrainedTokenizer { }
 export class MPNetTokenizer extends PreTrainedTokenizer { }
@@ -3308,9 +3395,7 @@ export class EsmTokenizer extends PreTrainedTokenizer { }
 
 export class Qwen2Tokenizer extends PreTrainedTokenizer { }
 
-export class GemmaTokenizer extends PreTrainedTokenizer {
-    _default_chat_template = "{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}"
-}
+export class GemmaTokenizer extends PreTrainedTokenizer { }
 
 export class Grok1Tokenizer extends PreTrainedTokenizer { }
 
@@ -3433,139 +3518,19 @@ export class M2M100Tokenizer extends PreTrainedTokenizer {
     }
 }
 
-
-const WHISPER_LANGUAGES = [
-    ["en", "english"],
-    ["zh", "chinese"],
-    ["de", "german"],
-    ["es", "spanish"],
-    ["ru", "russian"],
-    ["ko", "korean"],
-    ["fr", "french"],
-    ["ja", "japanese"],
-    ["pt", "portuguese"],
-    ["tr", "turkish"],
-    ["pl", "polish"],
-    ["ca", "catalan"],
-    ["nl", "dutch"],
-    ["ar", "arabic"],
-    ["sv", "swedish"],
-    ["it", "italian"],
-    ["id", "indonesian"],
-    ["hi", "hindi"],
-    ["fi", "finnish"],
-    ["vi", "vietnamese"],
-    ["he", "hebrew"],
-    ["uk", "ukrainian"],
-    ["el", "greek"],
-    ["ms", "malay"],
-    ["cs", "czech"],
-    ["ro", "romanian"],
-    ["da", "danish"],
-    ["hu", "hungarian"],
-    ["ta", "tamil"],
-    ["no", "norwegian"],
-    ["th", "thai"],
-    ["ur", "urdu"],
-    ["hr", "croatian"],
-    ["bg", "bulgarian"],
-    ["lt", "lithuanian"],
-    ["la", "latin"],
-    ["mi", "maori"],
-    ["ml", "malayalam"],
-    ["cy", "welsh"],
-    ["sk", "slovak"],
-    ["te", "telugu"],
-    ["fa", "persian"],
-    ["lv", "latvian"],
-    ["bn", "bengali"],
-    ["sr", "serbian"],
-    ["az", "azerbaijani"],
-    ["sl", "slovenian"],
-    ["kn", "kannada"],
-    ["et", "estonian"],
-    ["mk", "macedonian"],
-    ["br", "breton"],
-    ["eu", "basque"],
-    ["is", "icelandic"],
-    ["hy", "armenian"],
-    ["ne", "nepali"],
-    ["mn", "mongolian"],
-    ["bs", "bosnian"],
-    ["kk", "kazakh"],
-    ["sq", "albanian"],
-    ["sw", "swahili"],
-    ["gl", "galician"],
-    ["mr", "marathi"],
-    ["pa", "punjabi"],
-    ["si", "sinhala"],
-    ["km", "khmer"],
-    ["sn", "shona"],
-    ["yo", "yoruba"],
-    ["so", "somali"],
-    ["af", "afrikaans"],
-    ["oc", "occitan"],
-    ["ka", "georgian"],
-    ["be", "belarusian"],
-    ["tg", "tajik"],
-    ["sd", "sindhi"],
-    ["gu", "gujarati"],
-    ["am", "amharic"],
-    ["yi", "yiddish"],
-    ["lo", "lao"],
-    ["uz", "uzbek"],
-    ["fo", "faroese"],
-    ["ht", "haitian creole"],
-    ["ps", "pashto"],
-    ["tk", "turkmen"],
-    ["nn", "nynorsk"],
-    ["mt", "maltese"],
-    ["sa", "sanskrit"],
-    ["lb", "luxembourgish"],
-    ["my", "myanmar"],
-    ["bo", "tibetan"],
-    ["tl", "tagalog"],
-    ["mg", "malagasy"],
-    ["as", "assamese"],
-    ["tt", "tatar"],
-    ["haw", "hawaiian"],
-    ["ln", "lingala"],
-    ["ha", "hausa"],
-    ["ba", "bashkir"],
-    ["jw", "javanese"],
-    ["su", "sundanese"],
-]
-
-// @ts-ignore
-const WHISPER_LANGUAGE_MAPPING = new Map(WHISPER_LANGUAGES);
-// @ts-ignore
-const WHISPER_TO_LANGUAGE_CODE_MAPPING = new Map([
-    ...WHISPER_LANGUAGES.map(([k, v]) => [v, k]),
-    ...[
-        ["burmese", "my"],
-        ["valencian", "ca"],
-        ["flemish", "nl"],
-        ["haitian", "ht"],
-        ["letzeburgesch", "lb"],
-        ["pushto", "ps"],
-        ["panjabi", "pa"],
-        ["moldavian", "ro"],
-        ["moldovan", "ro"],
-        ["sinhalese", "si"],
-        ["castilian", "es"],
-    ]
-]);
-
 /**
  * WhisperTokenizer tokenizer
  * @extends PreTrainedTokenizer
  */
 export class WhisperTokenizer extends PreTrainedTokenizer {
-    _default_chat_template = `{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}`;
+
+    get timestamp_begin() {
+        return this.model.convert_tokens_to_ids(["<|notimestamps|>"])[0] + 1;
+    }
 
     /**
      * Decodes automatic speech recognition (ASR) sequences.
-     * @param {Array<{tokens: number[], token_timestamps?: number[], stride: number[]}>} sequences The sequences to decode.
+     * @param {Array<{tokens: bigint[], token_timestamps?: number[], stride: number[]}>} sequences The sequences to decode.
      * @param {Object} options The options to use for decoding.
      * @returns {Array<string|{chunks?: undefined|Array<{language: string|null, timestamp: Array<number|null>, text: string}>}>} The decoded sequences.
      */
@@ -3609,7 +3574,7 @@ export class WhisperTokenizer extends PreTrainedTokenizer {
         const chunks = [];
         let chunk = new_chunk();
         let time_offset = 0.0;
-        const timestamp_begin = this.model.convert_tokens_to_ids(["<|notimestamps|>"])[0] + 1;
+        const timestamp_begin = this.timestamp_begin;
 
         let previous_tokens = [];
         let previous_token_timestamps = [];
@@ -3647,7 +3612,7 @@ export class WhisperTokenizer extends PreTrainedTokenizer {
 
                 if (stride_right) {
                     for (let i = token_ids.length - 1; i >= 0; --i) {
-                        const token = token_ids[i];
+                        const token = Number(token_ids[i]);
                         if (token >= timestamp_begin) {
                             // There can be several token in the right stride
                             // But the last one is ALWAYS going to be skipped
@@ -3665,7 +3630,7 @@ export class WhisperTokenizer extends PreTrainedTokenizer {
 
             // - all tokens within output
             for (let i = 0; i < token_ids.length; ++i) {
-                const token = token_ids[i];
+                const token = Number(token_ids[i]);
                 // 4 possible states for each token
                 // - 1/ Language code
                 // - 2/ all other special tokens (which we ignore)
@@ -3766,6 +3731,14 @@ export class WhisperTokenizer extends PreTrainedTokenizer {
                         let end_time;
                         if (i + 1 < token_timestamps.length) {
                             end_time = round(token_timestamps[i + 1] + time_offset, 2);
+
+                            // Do not allow punctuation-only tokens to have a duration.
+                            // This prevents long pauses from messing up the timestamps.
+                            const decoded_text = this.decode([token]);
+                            if (PUNCTUATION_ONLY_REGEX.test(decoded_text)) {
+                                // Add `time_precision` to avoid overlapping timestamps
+                                end_time = round(Math.min(start_time + time_precision, end_time), 2);
+                            }
                         } else {
                             // should never happen
                             end_time = null;
@@ -3909,7 +3882,9 @@ export class WhisperTokenizer extends PreTrainedTokenizer {
 
             const rightLength = rightSequence.length;
             for (let j = 1; j < leftLength + rightLength; ++j) {
-                const eps = j / 10000.0;
+                // Slightly convoluted because we don't want out of bound indices
+                // This will be necessary for a small conflict resolution optimization
+                // later
                 const leftStart = Math.max(0, leftLength - j);
                 const leftStop = Math.min(leftLength, leftLength + rightLength - j);
                 const left = leftSequence.slice(leftStart, leftStop);
@@ -3919,7 +3894,21 @@ export class WhisperTokenizer extends PreTrainedTokenizer {
                 if (left.length !== right.length) {
                     throw new Error("There is a bug within whisper `decode_asr` function, please report it. Dropping to prevent bad inference.");
                 }
-                const matches = left.filter((elem, idx) => elem === right[idx]).length;
+
+                let matches;
+                if (use_token_timestamp_sequences) {
+                    // Get length of longest subsequence of tokens that match
+                    // and have timestamps that are in order
+                    matches = left.filter((elem, idx) => (
+                        elem === right[idx]
+                        && left_token_timestamp_sequence[leftStart + idx] <= token_timestamp_sequences[i][rightStart + idx]
+                    )).length;
+                } else {
+                    matches = left.filter((elem, idx) => elem === right[idx]).length;
+                }
+
+                // epsilon to favor long perfect matches
+                const eps = j / 10000.0;
                 const matching = matches / j + eps;
                 if (matches > 1 && matching > max) {
                     max = matching;
@@ -3999,7 +3988,7 @@ export class WhisperTokenizer extends PreTrainedTokenizer {
     ) {
         let text;
         // @ts-ignore
-        if (decode_args && decode_args.decode_with_timestamps) {
+        if (decode_args?.decode_with_timestamps) {
             if (token_ids instanceof Tensor) {
                 token_ids = prepareTensorForDecode(token_ids);
             }
@@ -4015,7 +4004,7 @@ export class WhisperTokenizer extends PreTrainedTokenizer {
     }
 
     /**
-     * @param {number[]} token_ids List of token IDs to decode.
+     * @param {number[]|bigint[]} token_ids List of token IDs to decode.
      * @param {Object} decode_args Optional arguments for decoding
      * @private
      */
@@ -4025,9 +4014,10 @@ export class WhisperTokenizer extends PreTrainedTokenizer {
         const timestamp_begin = Array.from(this.all_special_ids).at(-1) + 1;
         /**@type {Array} */
         let outputs = [[]];
-        for (const token of token_ids) {
+        for (let token of token_ids) {
+            token = Number(token);
             if (token >= timestamp_begin) {
-                const timestamp = round((token - timestamp_begin) * time_precision, 2);
+                const timestamp = ((token - timestamp_begin) * time_precision).toFixed(2);
                 outputs.push(`<|${timestamp}|>`);
                 outputs.push([]);
             } else {
@@ -4035,13 +4025,7 @@ export class WhisperTokenizer extends PreTrainedTokenizer {
             }
         }
         outputs = outputs.map(
-            s => {
-                if (typeof s === 'string') {
-                    return s;
-                } else {
-                    return super.decode(s, decode_args);
-                }
-            }
+            s => typeof s === 'string' ? s : super.decode(s, decode_args)
         )
 
         return outputs.join('');
@@ -4192,105 +4176,6 @@ export class WhisperTokenizer extends PreTrainedTokenizer {
             newIndices.filter(x => x.length > 0),
         ]
     }
-
-    /**
-     * Helper function to build translation inputs for a `WhisperTokenizer`,
-     * depending on the language, task, and whether to predict timestamp tokens.
-     * 
-     * Used to override the prefix tokens appended to the start of the label sequence.
-     * 
-     * **Example: Get ids for a language**
-     * ```javascript
-     * // instantiate the tokenizer and set the prefix token to Spanish
-     * const tokenizer = await WhisperTokenizer.from_pretrained('Xenova/whisper-tiny');
-     * const forced_decoder_ids = tokenizer.get_decoder_prompt_ids({ language: 'spanish' });
-     * // [(1, 50262), (2, 50363)]
-     * ```
-     * 
-     * @param {Object} options Options to generate the decoder prompt.
-     * @param {string} [options.language] The language of the transcription text.
-     * The corresponding language id token is appended to the start of the sequence for multilingual
-     * speech recognition and speech translation tasks, e.g. for "Spanish" the token "<|es|>" is appended
-     * to the start of sequence.
-     * @param {string} [options.task] Task identifier to append at the start of sequence (if any).
-     * This should be used for mulitlingual fine-tuning, with "transcribe" for speech recognition and
-     * "translate" for speech translation.
-     * @param {boolean} [options.no_timestamps] Whether to add the <|notimestamps|> token at the start of the sequence.
-     * @returns {number[][]} The decoder prompt ids.
-     */
-    get_decoder_prompt_ids({
-        language = null,
-        task = null,
-        no_timestamps = true,
-    } = {}) {
-
-        // <|lang_id|> <|task|> <|notimestamps|>
-
-        const forced_decoder_ids = [];
-
-        if (language) {
-            // User wishes to specify the language
-            language = language.toLowerCase();
-
-            // Map to code from user-friendly name (e.g., "english" -> "en")
-            let language_code = WHISPER_TO_LANGUAGE_CODE_MAPPING.get(language);
-
-            if (language_code === undefined) {
-                // User provided something that is not a language name
-
-                if (WHISPER_LANGUAGE_MAPPING.has(language)) {
-                    // User provided the language code directly (e.g., "en")
-                    language_code = language;
-
-                } else {
-                    // User provided something that is not a language code or name
-                    const is_language_code = language.length === 2;
-                    const langs = is_language_code ? WHISPER_LANGUAGE_MAPPING.keys() : WHISPER_LANGUAGE_MAPPING.values();
-
-                    throw new Error(`Language "${language}" is not supported. Must be one of: ${JSON.stringify(langs)}`);
-                }
-            }
-
-            const language_token_id = this.model.tokens_to_ids.get(`<|${language_code}|>`);
-            if (language_token_id === undefined) {
-                throw new Error(`Unable to find language "${language_code}" in model vocabulary. Please report this issue at https://github.com/xenova/transformers.js/issues/new/choose.`)
-            }
-
-            forced_decoder_ids.push(language_token_id);
-        } else {
-            // No token will be forced, which leaves the model to predict the language
-            forced_decoder_ids.push(null);
-        }
-
-        if (task) {
-            task = task.toLowerCase();
-            if (task !== 'transcribe' && task !== 'translate') {
-                throw new Error(`Task "${task}" is not supported. Must be one of: ["transcribe", "translate"]`);
-            }
-
-            const task_token_id = this.model.tokens_to_ids.get(`<|${task}|>`);
-            if (task_token_id === undefined) {
-                throw new Error(`Unable to find task "${task}" in model vocabulary. Please report this issue at https://github.com/xenova/transformers.js/issues/new/choose.`)
-            }
-
-            forced_decoder_ids.push(task_token_id);
-        } else {
-            // No token will be forced, which leaves the model to predict the task
-            forced_decoder_ids.push(null);
-        }
-
-        if (no_timestamps) {
-            const no_timestamps_id = this.model.tokens_to_ids.get(`<|notimestamps|>`);
-            if (no_timestamps_id === undefined) {
-                throw new Error('Unable to find "<|notimestamps|>" in model vocabulary. Please report this issue at https://github.com/xenova/transformers.js/issues/new/choose.')
-            }
-
-            forced_decoder_ids.push(no_timestamps_id);
-        }
-
-        return forced_decoder_ids.map((x, i) => [i + 1, x]).filter(x => x[1] !== null);
-
-    }
 }
 export class CodeGenTokenizer extends PreTrainedTokenizer { }
 export class CLIPTokenizer extends PreTrainedTokenizer { }
@@ -4351,10 +4236,8 @@ export class MarianTokenizer extends PreTrainedTokenizer {
 
 export class Wav2Vec2CTCTokenizer extends PreTrainedTokenizer { }
 
-export class BlenderbotTokenizer extends PreTrainedTokenizer {
-    _default_chat_template = `{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}`;
-}
-export class BlenderbotSmallTokenizer extends BlenderbotTokenizer { } // NOTE `BlenderbotTokenizer` to get the correct chat template
+export class BlenderbotTokenizer extends PreTrainedTokenizer { }
+export class BlenderbotSmallTokenizer extends PreTrainedTokenizer { }
 
 export class SpeechT5Tokenizer extends PreTrainedTokenizer { }
 
@@ -4447,7 +4330,6 @@ export class AutoTokenizer {
      * @returns {Promise<PreTrainedTokenizer>} A new instance of the PreTrainedTokenizer class.
      */
     static async from_pretrained(pretrained_model_name_or_path, {
-        quantized = true,
         progress_callback = null,
         config = null,
         cache_dir = null,
@@ -4457,7 +4339,6 @@ export class AutoTokenizer {
     } = {}) {
 
         const [tokenizerJSON, tokenizerConfig] = await loadTokenizer(pretrained_model_name_or_path, {
-            quantized,
             progress_callback,
             config,
             cache_dir,
diff --git a/src/transformers.js b/src/transformers.js
index 9dcd0160c..be7ad176e 100644
--- a/src/transformers.js
+++ b/src/transformers.js
@@ -11,8 +11,8 @@
  * @module transformers
  */
 
+export { env } from './env.js';
 export * from './pipelines.js';
-export * from './env.js';
 export * from './models.js';
 export * from './tokenizers.js';
 export * from './processors.js';
@@ -22,3 +22,7 @@ export * from './utils/audio.js';
 export * from './utils/image.js';
 export * from './utils/tensor.js';
 export * from './utils/maths.js';
+
+export * from './generation/streamers.js';
+export * from './generation/stopping_criteria.js';
+
diff --git a/src/utils/audio.js b/src/utils/audio.js
index 59c2705db..a1b1326df 100644
--- a/src/utils/audio.js
+++ b/src/utils/audio.js
@@ -14,6 +14,7 @@ import { FFT, max } from './maths.js';
 import {
     calculateReflectOffset,
 } from './core.js';
+import { Tensor, matmul } from './tensor.js';
 
 
 /**
@@ -78,28 +79,54 @@ export async function read_audio(url, sampling_rate) {
 }
 
 /**
- * Generates a Hanning window of length M.
- *
- * @param {number} M The length of the Hanning window to generate.
- * @returns {Float64Array} The generated Hanning window.
+ * Helper function to generate windows that are special cases of the generalized cosine window.
+ * See https://www.mathworks.com/help/signal/ug/generalized-cosine-windows.html for more information.
+ * @param {number} M Number of points in the output window. If zero or less, an empty array is returned.
+ * @param {number} a_0 Offset for the generalized cosine window.
+ * @returns {Float64Array} The generated window.
  */
-export function hanning(M) {
+function generalized_cosine_window(M, a_0) {
     if (M < 1) {
         return new Float64Array();
     }
     if (M === 1) {
         return new Float64Array([1]);
     }
-    const denom = M - 1;
-    const factor = Math.PI / denom;
+
+    const a_1 = 1 - a_0;
+    const factor = 2 * Math.PI / (M - 1);
+
     const cos_vals = new Float64Array(M);
     for (let i = 0; i < M; ++i) {
-        const n = 2 * i - denom;
-        cos_vals[i] = 0.5 + 0.5 * Math.cos(factor * n);
+        cos_vals[i] = a_0 - a_1 * Math.cos(i * factor);
     }
     return cos_vals;
 }
 
+/**
+ * Generates a Hanning window of length M.
+ * See https://numpy.org/doc/stable/reference/generated/numpy.hanning.html for more information.
+ *
+ * @param {number} M The length of the Hanning window to generate.
+ * @returns {Float64Array} The generated Hanning window.
+ */
+export function hanning(M) {
+    return generalized_cosine_window(M, 0.5);
+}
+
+
+/**
+ * Generates a Hamming window of length M.
+ * See https://numpy.org/doc/stable/reference/generated/numpy.hamming.html for more information.
+ *
+ * @param {number} M The length of the Hamming window to generate.
+ * @returns {Float64Array} The generated Hamming window.
+ */
+export function hamming(M) {
+    return generalized_cosine_window(M, 0.54);
+}
+
+
 const HERTZ_TO_MEL_MAPPING = {
     "htk": (/** @type {number} */ freq) => 2595.0 * Math.log10(1.0 + (freq / 700.0)),
     "kaldi": (/** @type {number} */ freq) => 1127.0 * Math.log(1.0 + (freq / 700.0)),
@@ -427,11 +454,12 @@ function power_to_db(spectrogram, reference = 1.0, min_value = 1e-10, db_range =
  * @param {boolean} [options.remove_dc_offset=null] Subtract mean from waveform on each frame, applied before pre-emphasis. This should be set to `true` in
  * order to get the same results as `torchaudio.compliance.kaldi.fbank` when computing mel filters.
  * @param {number} [options.max_num_frames=null] If provided, limits the number of frames to compute to this value.
+ * @param {number} [options.min_num_frames=null] If provided, ensures the number of frames to compute is at least this value.
  * @param {boolean} [options.do_pad=true] If `true`, pads the output spectrogram to have `max_num_frames` frames.
  * @param {boolean} [options.transpose=false] If `true`, the returned spectrogram will have shape `(num_frames, num_frequency_bins/num_mel_filters)`. If `false`, the returned spectrogram will have shape `(num_frequency_bins/num_mel_filters, num_frames)`.
- * @returns {{data: Float32Array, dims: number[]}} Spectrogram of shape `(num_frequency_bins, length)` (regular spectrogram) or shape `(num_mel_filters, length)` (mel spectrogram).
+ * @returns {Promise<Tensor>} Spectrogram of shape `(num_frequency_bins, length)` (regular spectrogram) or shape `(num_mel_filters, length)` (mel spectrogram).
  */
-export function spectrogram(
+export async function spectrogram(
     waveform,
     window,
     frame_length,
@@ -452,6 +480,7 @@ export function spectrogram(
         remove_dc_offset = null,
 
         // Custom parameters for efficiency reasons
+        min_num_frames = null,
         max_num_frames = null,
         do_pad = true,
         transpose = false,
@@ -489,8 +518,10 @@ export function spectrogram(
     }
 
     // split waveform into frames of frame_length size
-    const num_frames = Math.floor(1 + Math.floor((waveform.length - frame_length) / hop_length))
-
+    let num_frames = Math.floor(1 + Math.floor((waveform.length - frame_length) / hop_length))
+    if (min_num_frames !== null && num_frames < min_num_frames) {
+        num_frames = min_num_frames
+    }
     const num_frequency_bins = onesided ? Math.floor(fft_length / 2) + 1 : fft_length
 
     let d1 = num_frames;
@@ -511,34 +542,43 @@ export function spectrogram(
     const fft = new FFT(fft_length);
     const inputBuffer = new Float64Array(fft_length);
     const outputBuffer = new Float64Array(fft.outputBufferSize);
-    const magnitudes = new Array(d1);
+    const transposedMagnitudeData = new Float32Array(num_frequency_bins * d1Max);
 
     for (let i = 0; i < d1; ++i) {
         // Populate buffer with waveform data
         const offset = i * hop_length;
-        for (let j = 0; j < frame_length; ++j) {
+        const buffer_size = Math.min(waveform.length - offset, frame_length);
+        if (buffer_size !== frame_length) {
+            // The full buffer is not needed, so we need to reset it (avoid overflow from previous iterations)
+            // NOTE: We don't need to reset the buffer if it's full since we overwrite the first
+            // `frame_length` values and the rest (`fft_length - frame_length`) remains zero.
+            inputBuffer.fill(0, 0, frame_length);
+        }
+
+        for (let j = 0; j < buffer_size; ++j) {
             inputBuffer[j] = waveform[offset + j];
         }
 
         if (remove_dc_offset) {
             let sum = 0;
-            for (let j = 0; j < frame_length; ++j) {
+            for (let j = 0; j < buffer_size; ++j) {
                 sum += inputBuffer[j];
             }
-            const mean = sum / frame_length;
-            for (let j = 0; j < frame_length; ++j) {
+            const mean = sum / buffer_size;
+            for (let j = 0; j < buffer_size; ++j) {
                 inputBuffer[j] -= mean;
             }
         }
 
         if (preemphasis !== null) {
             // Done in reverse to avoid copies and distructive modification
-            for (let j = frame_length - 1; j >= 1; --j) {
+            for (let j = buffer_size - 1; j >= 1; --j) {
                 inputBuffer[j] -= preemphasis * inputBuffer[j - 1];
             }
             inputBuffer[0] *= 1 - preemphasis;
         }
 
+        // Apply window function
         for (let j = 0; j < window.length; ++j) {
             inputBuffer[j] *= window[j];
         }
@@ -546,74 +586,63 @@ export function spectrogram(
         fft.realTransform(outputBuffer, inputBuffer);
 
         // compute magnitudes
-        const row = new Array(num_frequency_bins);
-        for (let j = 0; j < row.length; ++j) {
+        for (let j = 0; j < num_frequency_bins; ++j) {
             const j2 = j << 1;
-            row[j] = outputBuffer[j2] ** 2 + outputBuffer[j2 + 1] ** 2;
+
+            // NOTE: We transpose the data here to avoid doing it later
+            transposedMagnitudeData[j * d1Max + i] = outputBuffer[j2] ** 2 + outputBuffer[j2 + 1] ** 2;
         }
-        magnitudes[i] = row;
     }
 
     if (power !== null && power !== 2) {
         // slight optimization to not sqrt
         const pow = 2 / power; // we use 2 since we already squared
-        for (let i = 0; i < magnitudes.length; ++i) {
-            const magnitude = magnitudes[i];
-            for (let j = 0; j < magnitude.length; ++j) {
-                magnitude[j] **= pow;
-            }
+        for (let i = 0; i < transposedMagnitudeData.length; ++i) {
+            transposedMagnitudeData[i] **= pow;
         }
     }
 
     // TODO: What if `mel_filters` is null?
     const num_mel_filters = mel_filters.length;
 
-    // Only here do we create Float32Array
-    const mel_spec = new Float32Array(num_mel_filters * d1Max);
-
     // Perform matrix muliplication:
     // mel_spec = mel_filters @ magnitudes.T
     //  - mel_filters.shape=(80, 201)
-    //  - magnitudes.shape=(3000, 201) => - magnitudes.T.shape=(201, 3000)
+    //  - magnitudes.shape=(3000, 201) => magnitudes.T.shape=(201, 3000)
     //  - mel_spec.shape=(80, 3000)
-    const dims = transpose ? [d1Max, num_mel_filters] : [num_mel_filters, d1Max];
-    for (let i = 0; i < num_mel_filters; ++i) { // num melfilters (e.g., 80)
-        const filter = mel_filters[i];
-        for (let j = 0; j < d1; ++j) { // num frames (e.g., 3000)
-            const magnitude = magnitudes[j];
-
-            let sum = 0;
-            for (let k = 0; k < num_frequency_bins; ++k) { // num frequency bins (e.g., 201)
-                sum += filter[k] * magnitude[k];
-            }
+    let mel_spec = await matmul(
+        // TODO: Make `mel_filters` a Tensor during initialization
+        new Tensor('float32', mel_filters.flat(), [num_mel_filters, num_frequency_bins]),
+        new Tensor('float32', transposedMagnitudeData, [num_frequency_bins, d1Max]),
+    );
+    if (transpose) {
+        mel_spec = mel_spec.transpose(1, 0);
+    }
 
-            mel_spec[
-                transpose
-                    ? j * num_mel_filters + i
-                    : i * d1 + j
-            ] = Math.max(mel_floor, sum);
-        }
+    const mel_spec_data = /** @type {Float32Array} */(mel_spec.data);
+    for (let i = 0; i < mel_spec_data.length; ++i) {
+        mel_spec_data[i] = Math.max(mel_floor, mel_spec_data[i]);
     }
 
     if (power !== null && log_mel !== null) {
-        const o = Math.min(mel_spec.length, d1 * num_mel_filters);
+        const o = Math.min(mel_spec_data.length, d1 * num_mel_filters);
+        // NOTE: operates in-place
         switch (log_mel) {
             case 'log':
                 for (let i = 0; i < o; ++i) {
-                    mel_spec[i] = Math.log(mel_spec[i]);
+                    mel_spec_data[i] = Math.log(mel_spec_data[i]);
                 }
                 break;
             case 'log10':
                 for (let i = 0; i < o; ++i) {
-                    mel_spec[i] = Math.log10(mel_spec[i]);
+                    mel_spec_data[i] = Math.log10(mel_spec_data[i]);
                 }
                 break;
             case 'dB':
                 if (power === 1.0) {
-                    // NOTE: operates in-place
-                    amplitude_to_db(mel_spec, reference, min_value, db_range);
+                    amplitude_to_db(mel_spec_data, reference, min_value, db_range);
                 } else if (power === 2.0) {
-                    power_to_db(mel_spec, reference, min_value, db_range);
+                    power_to_db(mel_spec_data, reference, min_value, db_range);
                 } else {
                     throw new Error(`Cannot use log_mel option '${log_mel}' with power ${power}`)
                 }
@@ -623,7 +652,7 @@ export function spectrogram(
         }
     }
 
-    return { data: mel_spec, dims };
+    return mel_spec;
 }
 
 /**
@@ -652,6 +681,9 @@ export function window_function(window_length, name, {
         case 'hann_window':
             window = hanning(length);
             break;
+        case 'hamming':
+            window = hamming(length);
+            break;
         case 'povey':
             window = hanning(length).map(x => Math.pow(x, 0.85));
             break;
diff --git a/src/utils/constants.js b/src/utils/constants.js
new file mode 100644
index 000000000..9d0e9ee42
--- /dev/null
+++ b/src/utils/constants.js
@@ -0,0 +1,2 @@
+
+export const GITHUB_ISSUE_URL = 'https://github.com/huggingface/transformers.js/issues/new/choose';
\ No newline at end of file
diff --git a/src/utils/core.js b/src/utils/core.js
index 4ed0f15ef..6a6137dff 100644
--- a/src/utils/core.js
+++ b/src/utils/core.js
@@ -42,40 +42,6 @@ export function escapeRegExp(string) {
     return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
 }
 
-/**
- * A base class for creating callable objects.
- * 
- * @type {new () => {(...args: any[]): any, _call(...args: any[]): any}}
- */
-export const Callable = /** @type {any} */ (class {
-    /**
-    * Creates a new instance of the Callable class.
-    */
-    constructor() {
-        /**
-         * Creates a closure that delegates to a private method '_call' with the given arguments.
-         * @type {any}
-         * @param {...any} args Zero or more arguments to pass to the '_call' method.
-         * @returns {*} The result of calling the '_call' method.
-         */
-        let closure = function (...args) {
-            return closure._call(...args)
-        }
-        return Object.setPrototypeOf(closure, new.target.prototype)
-    }
-
-    /**
-     * This method should be implemented in subclasses to provide the
-     * functionality of the callable object.
-     *
-     * @param {any[]} args
-     * @throws {Error} If the subclass does not implement the `_call` method.
-     */
-    _call(...args) {
-        throw Error('Must implement _call method in subclass')
-    }
-});
-
 /**
  * Check if a value is a typed array.
  * @param {*} val The value to check.
@@ -97,15 +63,6 @@ export function isIntegralNumber(x) {
     return Number.isInteger(x) || typeof x === 'bigint'
 }
 
-/**
- * Check if a value is exists.
- * @param {*} x The value to check.
- * @returns {boolean} True if the value exists, false otherwise.
- */
-export function exists(x) {
-    return x !== undefined && x !== null;
-}
-
 /**
  * Calculates the dimensions of a nested array.
  *
@@ -173,3 +130,32 @@ export function product(...a) {
 export function calculateReflectOffset(i, w) {
     return Math.abs((i + w) % (2 * w) - w);
 }
+
+/**
+ * 
+ * @param {Object} o 
+ * @param {string[]} props 
+ * @returns {Object}
+ */
+export function pick(o, props) {
+    return Object.assign(
+        {},
+        ...props.map((prop) => {
+            if (o[prop] !== undefined) {
+                return { [prop]: o[prop] };
+            }
+        })
+    );
+}
+
+/**
+ * Calculate the length of a string, taking multi-byte characters into account.
+ * This mimics the behavior of Python's `len` function.
+ * @param {string} s The string to calculate the length of. 
+ * @returns {number} The length of the string.
+ */
+export function len(s) {
+    let length = 0;
+    for (const c of s) ++length;
+    return length;
+}
diff --git a/src/utils/data-structures.js b/src/utils/data-structures.js
index dd8a78867..2340d12c0 100644
--- a/src/utils/data-structures.js
+++ b/src/utils/data-structures.js
@@ -22,11 +22,12 @@ export class PriorityQueue {
 
     /**
      * Create a new PriorityQueue.
-     * @param {Function} comparator Comparator function to determine priority. Defaults to a MaxHeap.
+     * @param {function(any, any): boolean} comparator Comparator function to determine priority. Defaults to a MaxHeap.
      */
-    constructor(comparator = (a, b) => a > b) {
+    constructor(comparator = (a, b) => a > b, maxSize = Infinity) {
         this._heap = [];
         this._comparator = comparator;
+        this._maxSize = maxSize;
     }
 
     /**
@@ -68,8 +69,20 @@ export class PriorityQueue {
      */
     extend(values) {
         for (const value of values) {
-            this._heap.push(value);
-            this._siftUp();
+            if (this.size < this._maxSize) {
+                this._heap.push(value);
+                this._siftUp();
+            } else {
+                // Get index of value with the lowest priority
+                const smallest = this._smallest();
+
+                // If the new value has higher priority than the smallest value in the heap
+                // then replace the smallest value with the new value and update the heap
+                if (this._comparator(value, this._heap[smallest])) {
+                    this._heap[smallest] = value;
+                    this._siftUpFrom(smallest);
+                }
+            }
         }
         return this.size;
     }
@@ -160,12 +173,20 @@ export class PriorityQueue {
      * @private
      */
     _siftUp() {
-        let node = this.size - 1;
+        this._siftUpFrom(this.size - 1);
+    }
+
+    /**
+     * Helper function to sift up from a given node.
+     * @param {number} node The index of the node to start sifting up from.
+     */
+    _siftUpFrom(node) {
         while (node > 0 && this._greater(node, this._parent(node))) {
             this._swap(node, this._parent(node));
             node = this._parent(node);
         }
     }
+
     /**
      * Maintain the heap property by updating positions in the heap,
      * starting at the first element and moving down the heap.
@@ -184,6 +205,15 @@ export class PriorityQueue {
             node = maxChild;
         }
     }
+
+    /**
+     * Get the index of the smallest element in the heap. Since we use an array-based heap,
+     * the index can be computed without needing to traverse the heap.
+     * @private
+     */
+    _smallest() {
+        return (2 ** (Math.floor(Math.log2(this.size))) - 1);
+    }
 }
 
 /**
@@ -199,7 +229,7 @@ export class CharTrie {
      * @param {string[]} texts The strings to add to the trie.
      */
     extend(texts) {
-        for (let text of texts) {
+        for (const text of texts) {
             this.push(text);
         }
     }
@@ -210,7 +240,7 @@ export class CharTrie {
      */
     push(text) {
         let node = this.root;
-        for (let ch of text) {
+        for (const ch of text) {
             let child = node.children.get(ch);
             if (child === undefined) {
                 child = CharTrieNode.default();
@@ -228,12 +258,14 @@ export class CharTrie {
      */
     *commonPrefixSearch(text) {
         let node = this.root;
+        if (node === undefined) return;
+
         let prefix = "";
-        for (let i = 0; i < text.length && node !== undefined; ++i) {
-            const ch = text[i];
+        for (const ch of text) {
             prefix += ch;
             node = node.children.get(ch);
-            if (node !== undefined && node.isLeaf) {
+            if (node === undefined) return;
+            if (node.isLeaf) {
                 yield prefix;
             }
         }
@@ -275,8 +307,8 @@ export class TokenLattice {
      * @param {number} eosTokenId The end-of-sequence token ID.
      */
     constructor(sentence, bosTokenId, eosTokenId) {
-        this.sentence = sentence;
-        this.len = sentence.length;
+        this.chars = Array.from(sentence);
+        this.len = this.chars.length;
         this.bosTokenId = bosTokenId;
         this.eosTokenId = eosTokenId;
         this.nodes = [];
@@ -310,7 +342,7 @@ export class TokenLattice {
     /**
      * Implements the Viterbi algorithm to compute the most likely sequence of tokens.
      *
-     * @returns {TokenLatticeNode[]} The array of nodes representing the most likely sequence of tokens.
+     * @returns {TokenLatticeNode[]} The most likely sequence of tokens.
      */
     viterbi() {
         const len = this.len;
@@ -364,11 +396,11 @@ export class TokenLattice {
      * @returns {string} The array of nodes representing the most likely sequence of tokens.
      */
     piece(node) {
-        return this.sentence.slice(node.pos, node.pos + node.length);
+        return this.chars.slice(node.pos, node.pos + node.length).join('');
     }
 
     /**
-     * @returns {Array} The array of nodes representing the most likely sequence of tokens.
+     * @returns {string[]} The most likely sequence of tokens.
      */
     tokens() {
         const nodes = this.viterbi();
@@ -376,7 +408,7 @@ export class TokenLattice {
     }
 
     /**
-     * @returns {Array} The array of nodes representing the most likely sequence of tokens.
+     * @returns {number[]} The most likely sequence of token ids.
      */
     tokenIds() {
         const nodes = this.viterbi();
diff --git a/src/utils/devices.js b/src/utils/devices.js
new file mode 100644
index 000000000..1086b33e4
--- /dev/null
+++ b/src/utils/devices.js
@@ -0,0 +1,22 @@
+
+/**
+ * The list of devices supported by Transformers.js
+ */
+export const DEVICE_TYPES = Object.freeze({
+    auto: 'auto', // Auto-detect based on device and environment
+    gpu: 'gpu', // Auto-detect GPU
+    cpu: 'cpu', // CPU
+    wasm: 'wasm', // WebAssembly
+    webgpu: 'webgpu', // WebGPU
+    cuda: 'cuda', // CUDA
+    dml: 'dml', // DirectML
+
+    webnn: 'webnn', // WebNN (default)
+    'webnn-npu': 'webnn-npu', // WebNN NPU
+    'webnn-gpu': 'webnn-gpu', // WebNN GPU
+    'webnn-cpu': 'webnn-cpu', // WebNN CPU
+});
+
+/**
+ * @typedef {keyof typeof DEVICE_TYPES} DeviceType
+ */
diff --git a/src/utils/dtypes.js b/src/utils/dtypes.js
new file mode 100644
index 000000000..fa6d94be5
--- /dev/null
+++ b/src/utils/dtypes.js
@@ -0,0 +1,60 @@
+import { apis } from "../env.js";
+
+import { DEVICE_TYPES } from "./devices.js";
+
+// TODO: Use the adapter from `env.backends.onnx.webgpu.adapter` to check for `shader-f16` support,
+// when available in https://github.com/microsoft/onnxruntime/pull/19940.
+// For more information, see https://github.com/microsoft/onnxruntime/pull/19857#issuecomment-1999984753
+
+/**
+ * Checks if WebGPU fp16 support is available in the current environment.
+ */
+export const isWebGpuFp16Supported = (function () {
+    /** @type {boolean} */
+    let cachedResult;
+
+    return async function () {
+        if (cachedResult === undefined) {
+            if (!apis.IS_WEBGPU_AVAILABLE) {
+                cachedResult = false;
+            } else {
+                try {
+                    const adapter = await navigator.gpu.requestAdapter();
+                    cachedResult = adapter.features.has('shader-f16');
+                } catch (e) {
+                    cachedResult = false;
+                }
+            }
+        }
+        return cachedResult;
+    };
+})();
+
+export const DATA_TYPES = Object.freeze({
+    fp32: 'fp32',
+    fp16: 'fp16',
+    q8: 'q8',
+    int8: 'int8',
+    uint8: 'uint8',
+    q4: 'q4',
+    bnb4: 'bnb4',
+    q4f16: 'q4f16', // fp16 model with int4 block weight quantization
+});
+/** @typedef {keyof typeof DATA_TYPES} DataType */
+
+export const DEFAULT_DEVICE_DTYPE_MAPPING = Object.freeze({
+    // NOTE: If not specified, will default to fp32
+    [DEVICE_TYPES.wasm]: DATA_TYPES.q8,
+});
+
+/** @type {Record<DataType, string>} */
+export const DEFAULT_DTYPE_SUFFIX_MAPPING = Object.freeze({
+    [DATA_TYPES.fp32]: '',
+    [DATA_TYPES.fp16]: '_fp16',
+    [DATA_TYPES.int8]: '_int8',
+    [DATA_TYPES.uint8]: '_uint8',
+    [DATA_TYPES.q8]: '_quantized',
+    [DATA_TYPES.q4]: '_q4',
+    [DATA_TYPES.q4f16]: '_q4f16',
+    [DATA_TYPES.bnb4]: '_bnb4',
+});
diff --git a/src/utils/generation.js b/src/utils/generation.js
deleted file mode 100644
index 1f9dc898b..000000000
--- a/src/utils/generation.js
+++ /dev/null
@@ -1,873 +0,0 @@
-
-/**
- * @file Classes, functions, and utilities for generation.
- * 
- * @todo Describe how to create a custom `GenerationConfig`.
- * 
- * @module utils/generation
- */
-import { Tensor } from './tensor.js';
-import {
-    Callable,
-    exists,
-} from './core.js';
-import {
-    max,
-    softmax,
-    log_softmax,
-    getTopItems,
-} from './maths.js';
-
-/**
- * A class representing a list of logits processors. A logits processor is a function that modifies the logits
- * output of a language model. This class provides methods for adding new processors and applying all processors to a
- * batch of logits.
- *
- * @extends Callable
- */
-export class LogitsProcessorList extends Callable {
-    /**
-     * Constructs a new instance of `LogitsProcessorList`.
-     */
-    constructor() {
-        super();
-        this.processors = [];
-    }
-
-    /**
-     * Adds a new logits processor to the list.
-     *
-     * @param {LogitsProcessor} item The logits processor function to add.
-     */
-    push(item) {
-        this.processors.push(item);
-    }
-
-    /**
-     * Adds multiple logits processors to the list.
-     *
-     * @param {LogitsProcessor[]} items The logits processor functions to add.
-     */
-    extend(items) {
-        this.processors.push(...items);
-    }
-
-    /**
-     * Applies all logits processors in the list to a batch of logits, modifying them in-place.
-     *
-     * @param {number[]} input_ids The input IDs for the language model.
-     * @param {number[][]} batchedLogits A 2D array of logits, where each row corresponds to a single
-     *                                                input sequence in the batch.
-     */
-    _call(input_ids, batchedLogits) {
-        // NOTE: This is different from the Python code, since vanilla JS does not support vectorized operations. 
-        // As a result, we apply each processor to each item in the batch.
-        for (let logits of batchedLogits) {
-            // Modifies logits inplace
-            this.processors.forEach(
-                func => func(input_ids, logits)
-            )
-        }
-    }
-
-    [Symbol.iterator]() {
-        return this.processors.values();
-    }
-}
-
-/**
- * Base class for processing logits.
- * @extends Callable
- */
-export class LogitsProcessor extends Callable {
-    /**
-     * Apply the processor to the input logits.
-     *
-     * @abstract
-     * @param {Array} input_ids The input ids.
-     * @param {Tensor} logits The logits to process.
-     * @throws {Error} Throws an error if `_call` is not implemented in the subclass.
-     */
-    _call(input_ids, logits) {
-        throw Error("`_call` should be implemented in a subclass")
-    }
-}
-
-/**
- * A logits processor that forces a specific token to be generated by the decoder.
- * 
- * @extends LogitsProcessor
- */
-export class ForceTokensLogitsProcessor extends LogitsProcessor {
-    /**
-     * Constructs a new instance of `ForceTokensLogitsProcessor`.
-     * 
-     * @param {Array} forced_decoder_ids The ids of tokens that should be forced.
-     */
-    constructor(forced_decoder_ids) {
-        super();
-        this.force_token_map = Object.fromEntries(forced_decoder_ids ?? []);
-    }
-
-    /**
-     * Apply the processor to the input logits.
-     *
-     * @param {Array} input_ids The input ids.
-     * @param {Tensor} logits The logits to process.
-     * @returns {Tensor} The processed logits.
-     */
-    _call(input_ids, logits) {
-        let map = this.force_token_map[input_ids.length];
-        if (exists(map)) { // There exists a mapping
-            logits.data.fill(-Infinity)
-            logits.data[map] = 0;
-        }
-        return logits;
-    }
-}
-
-/**
- * A LogitsProcessor that forces a BOS token at the beginning of the generated sequence.
- * @extends LogitsProcessor
- */
-export class ForcedBOSTokenLogitsProcessor extends LogitsProcessor {
-    /**
-     * Create a ForcedBOSTokenLogitsProcessor.
-     * @param {number} bos_token_id The ID of the beginning-of-sequence token to be forced.
-     */
-    constructor(bos_token_id) {
-        super();
-        this.bos_token_id = bos_token_id;
-    }
-
-    /**
-     * Apply the BOS token forcing to the logits.
-     * @param {Array} input_ids The input IDs.
-     * @param {Object} logits The logits.
-     * @returns {Object} The logits with BOS token forcing.
-     */
-    _call(input_ids, logits) {
-        if (input_ids.length === 1) {
-            logits.data.fill(-Infinity)
-            logits.data[this.bos_token_id] = 0;
-        }
-        return logits;
-    }
-}
-
-/**
- * A logits processor that forces end-of-sequence token probability to 1.
- * 
- * @extends LogitsProcessor
- */
-export class ForcedEOSTokenLogitsProcessor extends LogitsProcessor {
-    /**
-     * Create a ForcedEOSTokenLogitsProcessor.
-     * @param {number} max_length Max length of the sequence.
-     * @param {number|number[]} forced_eos_token_id The ID of the end-of-sequence token to be forced.
-     */
-    constructor(max_length, forced_eos_token_id) {
-        super();
-        this.max_length = max_length;
-        this.forced_eos_token_id = forced_eos_token_id;
-    }
-
-    /**
-     * Apply the processor to input_ids and logits.
-     * 
-     * @param {number[]} input_ids The input ids.
-     * @param {Tensor} logits The logits tensor.
-     */
-    _call(input_ids, logits) {
-        // console.log('call ForcedEOSTokenLogitsProcessor')
-        // TODO
-    }
-}
-
-/**
- * A LogitsProcessor that suppresses a list of tokens as soon as the `generate` function starts
- * generating using `begin_index` tokens. This should ensure that the tokens defined by
- * `begin_suppress_tokens` at not sampled at the begining of the generation.
- * @extends LogitsProcessor
- */
-export class SuppressTokensAtBeginLogitsProcessor extends LogitsProcessor {
-    /**
-     * Create a SuppressTokensAtBeginLogitsProcessor.
-     * @param {number[]} begin_suppress_tokens The IDs of the tokens to suppress.
-     * @param {number} begin_index The number of tokens to generate before suppressing tokens.
-     */
-    constructor(begin_suppress_tokens, begin_index) {
-        super();
-        this.begin_suppress_tokens = begin_suppress_tokens;
-        this.begin_index = begin_index;
-    }
-
-    /**
-     * Apply the BOS token forcing to the logits.
-     * @param {Array} input_ids The input IDs.
-     * @param {Object} logits The logits.
-     * @returns {Object} The logits with BOS token forcing.
-     */
-    _call(input_ids, logits) {
-        if (input_ids.length === this.begin_index) {
-            for (let token_id of this.begin_suppress_tokens) {
-                logits.data[token_id] = -Infinity;
-            }
-        }
-        return logits;
-    }
-}
-
-/**
- * A LogitsProcessor that handles adding timestamps to generated text.
- * @extends LogitsProcessor
- */
-export class WhisperTimeStampLogitsProcessor extends LogitsProcessor {
-    /**
-     * Constructs a new WhisperTimeStampLogitsProcessor.
-     * @param {Object} generate_config The config object passed to the `generate()` method of a transformer model.
-     * @param {number} generate_config.eos_token_id The ID of the end-of-sequence token.
-     * @param {number} generate_config.no_timestamps_token_id The ID of the token used to indicate that a token should not have a timestamp.
-     * @param {number[][]} [generate_config.forced_decoder_ids] An array of two-element arrays representing decoder IDs that are forced to appear in the output. The second element of each array indicates whether the token is a timestamp.
-     * @param {number} [generate_config.max_initial_timestamp_index] The maximum index at which an initial timestamp can appear.
-     */
-    constructor(generate_config) {
-        super();
-        this.eos_token_id = generate_config.eos_token_id;
-        this.no_timestamps_token_id = generate_config.no_timestamps_token_id;
-        this.timestamp_begin = this.no_timestamps_token_id + 1;
-
-        this.begin_index = (generate_config.forced_decoder_ids || []).length + 2;
-        if (generate_config.forced_decoder_ids.slice(-1)[0][1] === this.no_timestamps_token_id) {
-            this.begin_index -= 1;
-        }
-        this.max_initial_timestamp_index = generate_config.max_initial_timestamp_index;
-
-    }
-
-    /**
-     * Modify the logits to handle timestamp tokens.
-     * @param {Array} input_ids The input sequence of tokens.
-     * @param {Tensor} logits The logits output by the model.
-     * @returns {Tensor} The modified logits.
-     */
-    _call(input_ids, logits) {
-        const logitsData = /** @type {Float32Array} */(logits.data);
-
-        // suppress <|notimestamps|> which is handled by without_timestamps
-        logitsData[this.no_timestamps_token_id] = -Infinity;
-
-        if (input_ids.length === this.begin_index - 1) {
-            logitsData.fill(-Infinity);
-            logitsData[this.timestamp_begin] = 0;
-            return logits;
-        }
-
-        // timestamps have to appear in pairs, except directly before eos_token; mask logits accordingly
-        const seq = input_ids.slice(this.begin_index);
-        const last_was_timestamp = seq.length >= 1 && seq[seq.length - 1] >= this.timestamp_begin;
-        const penultimate_was_timestamp = seq.length < 2 || seq[seq.length - 2] >= this.timestamp_begin;
-
-        if (last_was_timestamp) {
-            if (penultimate_was_timestamp) { // has to be non-timestamp
-                logitsData.subarray(this.timestamp_begin).fill(-Infinity);
-            } else { // cannot be normal text tokens
-                logitsData.subarray(0, this.eos_token_id).fill(-Infinity);
-            }
-        }
-
-        // apply the `max_initial_timestamp` option
-        if (input_ids.length === this.begin_index && this.max_initial_timestamp_index !== null) {
-            const last_allowed = this.timestamp_begin + this.max_initial_timestamp_index;
-            logitsData.subarray(last_allowed + 1).fill(-Infinity);
-        }
-
-        // if sum of probability over timestamps is above any other token, sample timestamp
-        const logprobs = log_softmax(logitsData);
-        const timestamp_logprob = Math.log(logprobs.subarray(this.timestamp_begin).map(Math.exp).reduce((a, b) => a + b));
-        const max_text_token_logprob = max(logprobs.subarray(0, this.timestamp_begin))[0];
-
-        if (timestamp_logprob > max_text_token_logprob) {
-            logitsData.subarray(0, this.timestamp_begin).fill(-Infinity);
-        }
-
-        return logits;
-    }
-}
-
-/**
- * A logits processor that disallows ngrams of a certain size to be repeated.
- * 
- * @extends LogitsProcessor
- */
-export class NoRepeatNGramLogitsProcessor extends LogitsProcessor {
-    /**
-     * Create a NoRepeatNGramLogitsProcessor.
-     * @param {number} no_repeat_ngram_size The no-repeat-ngram size. All ngrams of this size can only occur once.
-     */
-    constructor(no_repeat_ngram_size) {
-        super();
-        this.no_repeat_ngram_size = no_repeat_ngram_size;
-    }
-
-    /**
-     * Generate n-grams from a sequence of token ids.
-     * @param {number[]} prevInputIds List of previous input ids
-     * @returns {Map<string, number[]>} Map of generated n-grams
-     */
-    getNgrams(prevInputIds) {
-        const curLen = prevInputIds.length;
-
-        /**@type {number[][]} */
-        const ngrams = [];
-        for (let j = 0; j < curLen + 1 - this.no_repeat_ngram_size; ++j) {
-            const ngram = [];
-            for (let k = 0; k < this.no_repeat_ngram_size; ++k) {
-                ngram.push(prevInputIds[j + k]);
-            }
-            ngrams.push(ngram);
-        }
-
-        /** @type {Map<string, number[]>} */
-        const generatedNgram = new Map();
-        for (const ngram of ngrams) {
-            const prevNgram = ngram.slice(0, ngram.length - 1);
-            const prevNgramKey = JSON.stringify(prevNgram);
-            const prevNgramValue = generatedNgram.get(prevNgramKey) ?? [];
-            prevNgramValue.push(ngram[ngram.length - 1]);
-            generatedNgram.set(prevNgramKey, prevNgramValue);
-        }
-        return generatedNgram;
-    }
-
-    /**
-     * Generate n-grams from a sequence of token ids.
-     * @param {Map<string, number[]>} bannedNgrams Map of banned n-grams
-     * @param {number[]} prevInputIds List of previous input ids
-     * @returns {number[]} Map of generated n-grams
-     */
-    getGeneratedNgrams(bannedNgrams, prevInputIds) {
-        const ngramIdx = prevInputIds.slice(prevInputIds.length + 1 - this.no_repeat_ngram_size, prevInputIds.length);
-        const banned = bannedNgrams.get(JSON.stringify(ngramIdx)) ?? [];
-        return banned;
-    }
-
-    /**
-     * Calculate banned n-gram tokens
-     * @param {number[]} prevInputIds List of previous input ids
-     * @returns {number[]} Map of generated n-grams
-     */
-    calcBannedNgramTokens(prevInputIds) {
-        const bannedTokens = [];
-        if (prevInputIds.length + 1 < this.no_repeat_ngram_size) {
-            // return no banned tokens if we haven't generated no_repeat_ngram_size tokens yet
-            return bannedTokens;
-
-        } else {
-            const generatedNgrams = this.getNgrams(prevInputIds);
-            const bannedTokens = this.getGeneratedNgrams(generatedNgrams, prevInputIds);
-            return bannedTokens;
-        }
-    }
-
-    /**
-     * Apply the no-repeat-ngram processor to the logits.
-     * @param {Array} input_ids The input IDs.
-     * @param {Object} logits The logits.
-     * @returns {Object} The logits with no-repeat-ngram processing.
-     */
-    _call(input_ids, logits) {
-        const bannedTokens = this.calcBannedNgramTokens(input_ids);
-
-        for (const token of bannedTokens) {
-            logits.data[token] = -Infinity;
-        }
-        return logits;
-    }
-}
-
-/**
- * A logits processor that penalises repeated output tokens.
- * 
- * @extends LogitsProcessor
- */
-export class RepetitionPenaltyLogitsProcessor extends LogitsProcessor {
-    /**
-     * Create a RepetitionPenaltyLogitsProcessor.
-     * @param {number} penalty The penalty to apply for repeated tokens.
-     */
-    constructor(penalty) {
-        super();
-        this.penalty = penalty;
-    }
-
-    /**
-     * Apply the repetition penalty to the logits.
-     * @param {Array} input_ids The input IDs.
-     * @param {Object} logits The logits.
-     * @returns {Object} The logits with repetition penalty processing.
-     */
-    _call(input_ids, logits) {
-        // Modify the logits corresponding to each element in `input_ids`.
-        // As a consequence, the logits corresponding to tokens that appear
-        // many times in the output will be penalised more.
-        for (const input_id of input_ids) {
-            if (logits.data[input_id] < 0) {
-                logits.data[input_id] *= this.penalty;
-            } else {
-                logits.data[input_id] /= this.penalty;
-            }
-        }
-        return logits
-    }
-}
-
-/**
- * A logits processor that enforces a minimum number of tokens.
- * 
- * @extends LogitsProcessor
- */
-export class MinLengthLogitsProcessor extends LogitsProcessor {
-    /**
-     * Create a MinLengthLogitsProcessor.
-     * @param {number} min_length The minimum length below which the score of `eos_token_id` is set to negative infinity.
-     * @param {number|number[]} eos_token_id The ID/IDs of the end-of-sequence token.
-     */
-    constructor(min_length, eos_token_id) {
-        super();
-        this.min_length = min_length;
-        this.eos_token_id = Array.isArray(eos_token_id) ? eos_token_id : [eos_token_id];
-    }
-
-    /**
-     * Apply logit processor.
-     * @param {Array} input_ids The input IDs.
-     * @param {Object} logits The logits.
-     * @returns {Object} The processed logits.
-     */
-    _call(input_ids, logits) {
-        if (input_ids.length < this.min_length) {
-            for (const eos_token of this.eos_token_id) {
-                logits.data[eos_token] = -Infinity;
-            }
-        }
-
-        return logits
-    }
-}
-
-/**
- * A logits processor that enforces a minimum number of new tokens.
- * 
- * @extends LogitsProcessor
- */
-export class MinNewTokensLengthLogitsProcessor extends LogitsProcessor {
-    /**
-     * Create a MinNewTokensLengthLogitsProcessor.
-     * @param {number} prompt_length_to_skip The input tokens length.
-     * @param {number} min_new_tokens The minimum *new* tokens length below which the score of `eos_token_id` is set to negative infinity.
-     * @param {number|number[]} eos_token_id The ID/IDs of the end-of-sequence token.
-     */
-    constructor(prompt_length_to_skip, min_new_tokens, eos_token_id) {
-        super();
-        this.prompt_length_to_skip = prompt_length_to_skip;
-        this.min_new_tokens = min_new_tokens;
-        this.eos_token_id = Array.isArray(eos_token_id) ? eos_token_id : [eos_token_id];
-    }
-
-    /**
-     * Apply logit processor.
-     * @param {Array} input_ids The input IDs.
-     * @param {Object} logits The logits.
-     * @returns {Object} The processed logits.
-     */
-    _call(input_ids, logits) {
-        const new_tokens_length = input_ids.length - this.prompt_length_to_skip;
-        if (new_tokens_length < this.min_new_tokens) {
-            for (const eos_token of this.eos_token_id) {
-                logits.data[eos_token] = -Infinity;
-            }
-        }
-
-        return logits
-    }
-}
-
-export class NoBadWordsLogitsProcessor extends LogitsProcessor {
-    /**
-     * Create a `NoBadWordsLogitsProcessor`.
-     * @param {number[][]} bad_words_ids List of list of token ids that are not allowed to be generated.
-     * @param {number|number[]} eos_token_id The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
-     */
-    constructor(bad_words_ids, eos_token_id) {
-        super();
-        this.bad_words_ids = bad_words_ids;
-        this.eos_token_id = Array.isArray(eos_token_id) ? eos_token_id : [eos_token_id];
-    }
-
-    /**
-     * Apply logit processor.
-     * @param {Array} input_ids The input IDs.
-     * @param {Object} logits The logits.
-     * @returns {Object} The processed logits.
-     */
-    _call(input_ids, logits) {
-
-        for (const bad_word_ids of this.bad_words_ids) {
-            // Whether to modify the logits of the last token in the bad word id sequence
-            let mark = true;
-
-            // For each bad word in the list, if the current sequence of input ids ends with this sequence (excluding the last),
-            // then we set the logits of the last bad word id to -Infinity.
-            for (let i = 1; i <= bad_word_ids.length - 1 && bad_word_ids.length < input_ids.length; ++i) {
-
-                if (bad_word_ids.at(-i - 1) !== input_ids.at(-i)) {
-                    // We have found a mismatch
-                    mark = false;
-                    break;
-                }
-            }
-            if (mark) {
-                logits.data[bad_word_ids.at(-1)] = -Infinity;
-            }
-        }
-
-        return logits
-    }
-}
-
-/**
- * @typedef {Object} GenerationConfigType The default configuration parameters.
- * @property {number} [max_length=20] The maximum length the generated tokens can have. Corresponds to the length of the input prompt + `max_new_tokens`. Its effect is overridden by `max_new_tokens`, if also set.
- * @property {number} [max_new_tokens=null] The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
- * @property {number} [min_length=0] The minimum length of the sequence to be generated. Corresponds to the length of the input prompt + `min_new_tokens`. Its effect is overridden by `min_new_tokens`, if also set.
- * @property {number} [min_new_tokens=null] The minimum numbers of tokens to generate, ignoring the number of tokens in the prompt.
- * @property {boolean|"never"} [early_stopping=false] Controls the stopping condition for beam-based methods, like beam-search. It accepts the following values:
- * - `true`, where the generation stops as soon as there are `num_beams` complete candidates;
- * - `false`, where an heuristic is applied and the generation stops when is it very unlikely to find better candidates;
- * - `"never"`, where the beam search procedure only stops when there cannot be better candidates (canonical beam search algorithm).
- * @property {number} [max_time=null] The maximum amount of time you allow the computation to run for in seconds. Generation will still finish the current pass after allocated time has been passed.
- *
- * @property {boolean} [do_sample=false] Whether or not to use sampling; use greedy decoding otherwise.
- * @property {number} [num_beams=1] Number of beams for beam search. 1 means no beam search.
- * @property {number} [num_beam_groups=1] Number of groups to divide `num_beams` into in order to ensure diversity among different groups of beams. See [this paper](https://arxiv.org/pdf/1610.02424.pdf) for more details.
- * @property {number} [penalty_alpha=null] The values balance the model confidence and the degeneration penalty in contrastive search decoding.
- * @property {boolean} [use_cache=true] Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding.
- *
- * @property {number} [temperature=1.0] The value used to modulate the next token probabilities.
- * @property {number} [top_k=50] The number of highest probability vocabulary tokens to keep for top-k-filtering.
- * @property {number} [top_p=1.0] If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or higher are kept for generation.
- * @property {number} [typical_p=1.0] Local typicality measures how similar the conditional probability of predicting a target token next is to the expected conditional probability of predicting a random token next, given the partial text already generated. If set to float < 1, the smallest set of the most locally typical tokens with probabilities that add up to `typical_p` or higher are kept for generation. See [this paper](https://arxiv.org/pdf/2202.00666.pdf) for more details.
- * @property {number} [epsilon_cutoff=0.0] If set to float strictly between 0 and 1, only tokens with a conditional probability greater than `epsilon_cutoff` will be sampled. In the paper, suggested values range from 3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://arxiv.org/abs/2210.15191) for more details.
- * @property {number} [eta_cutoff=0.0] Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to float strictly between 0 and 1, a token is only considered if it is greater than either `eta_cutoff` or `sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits)))`. The latter term is intuitively the expected next token probability, scaled by `sqrt(eta_cutoff)`. In the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model. See [Truncation Sampling as Language Model Desmoothing](https://arxiv.org/abs/2210.15191) for more details.
- * @property {number} [diversity_penalty=0.0] This value is subtracted from a beam's score if it generates a token same as any beam from other group at a particular time. Note that `diversity_penalty` is only effective if `group beam search` is enabled.
- * @property {number} [repetition_penalty=1.0] The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
- * @property {number} [encoder_repetition_penalty=1.0] The paramater for encoder_repetition_penalty. An exponential penalty on sequences that are not in the original input. 1.0 means no penalty.
- * @property {number} [length_penalty=1.0] Exponential penalty to the length that is used with beam-based generation. It is applied as an exponent to the sequence length, which in turn is used to divide the score of the sequence. Since the score is the log likelihood of the sequence (i.e. negative), `length_penalty` > 0.0 promotes longer sequences, while `length_penalty` < 0.0 encourages shorter sequences.
- * @property {number} [no_repeat_ngram_size=0] If set to int > 0, all ngrams of that size can only occur once.
- * @property {number[][]} [bad_words_ids=null] List of token ids that are not allowed to be generated. In order to get the token ids of the words that should not appear in the generated text, use `(await tokenizer(bad_words, {add_prefix_space: true, add_special_tokens: false})).input_ids`.
- * @property {number[][]|number[][][]} [force_words_ids=null] List of token ids that must be generated. If given a `number[][]`, this is treated as a simple list of words that must be included, the opposite to `bad_words_ids`. If given `number[][][]`, this triggers a [disjunctive constraint](https://github.com/huggingface/transformers/issues/14081), where one can allow different forms of each word.
- * @property {boolean} [renormalize_logits=false] Whether to renormalize the logits after applying all the logits processors or warpers (including the custom ones). It's highly recommended to set this flag to `true` as the search algorithms suppose the score logits are normalized but some logit processors or warpers break the normalization.
- * @property {Object[]} [constraints=null] Custom constraints that can be added to the generation to ensure that the output will contain the use of certain tokens as defined by `Constraint` objects, in the most sensible way possible.
- * 
- * @property {number} [forced_bos_token_id=null] The id of the token to force as the first generated token after the `decoder_start_token_id`. Useful for multilingual models like mBART where the first generated token needs to be the target language token.
- * @property {number|number[]} [forced_eos_token_id=null] The id of the token to force as the last generated token when `max_length` is reached. Optionally, use a list to set multiple *end-of-sequence* tokens.
- * @property {boolean} [remove_invalid_values=false] Whether to remove possible *nan* and *inf* outputs of the model to prevent the generation method to crash. Note that using `remove_invalid_values` can slow down generation.
- * @property {number[]} [exponential_decay_length_penalty=null] This Tuple adds an exponentially increasing length penalty, after a certain amount of tokens have been generated. The tuple shall consist of: `(start_index, decay_factor)` where `start_index` indicates where penalty starts and `decay_factor` represents the factor of exponential decay.
- * @property {number[]} [suppress_tokens=null] A list of tokens that will be suppressed at generation. The `SupressTokens` logit processor will set their log probs to `-inf` so that they are not sampled.
- * @property {number[]} [begin_suppress_tokens=null] A list of tokens that will be suppressed at the beginning of the generation. The `SupressBeginTokens` logit processor will set their log probs to `-inf` so that they are not sampled.
- * @property {number[][]} [forced_decoder_ids=null] A list of pairs of integers which indicates a mapping from generation indices to token indices that will be forced before sampling. For example, `[[1, 123]]` means the second generated token will always be a token of index 123.
- * 
- * @property {number} [num_return_sequences=1] The number of independently computed returned sequences for each element in the batch.
- * @property {boolean} [output_attentions=false] Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned tensors for more details.
- * @property {boolean} [output_hidden_states=false] Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for more details.
- * @property {boolean} [output_scores=false] Whether or not to return the prediction scores. See `scores` under returned tensors for more details.
- * @property {boolean} [return_dict_in_generate=false] Whether or not to return a `ModelOutput` instead of a plain tuple.
- * 
- * @property {number} [pad_token_id=null] The id of the *padding* token.
- * @property {number} [bos_token_id=null] The id of the *beginning-of-sequence* token.
- * @property {number|number[]} [eos_token_id=null] The id of the *end-of-sequence* token. Optionally, use a list to set multiple *end-of-sequence* tokens.
- * 
- * @property {number} [encoder_no_repeat_ngram_size=0] If set to int > 0, all ngrams of that size that occur in the `encoder_input_ids` cannot occur in the `decoder_input_ids`.
- * @property {number} [decoder_start_token_id=null] If an encoder-decoder model starts decoding with a different token than *bos*, the id of that token.
- * 
- * @property {Object} [generation_kwargs={}] Additional generation kwargs will be forwarded to the `generate` function of the model. Kwargs that are not present in `generate`'s signature will be used in the model forward pass.
- */
-
-/**
- * Class that holds a configuration for a generation task.
- * @type {new (kwargs?: GenerationConfigType) => GenerationConfigType}
- */
-export const GenerationConfig = /** @type {any} */ (class {
-
-    /**
-     * Create a new GenerationConfig object.
-     * @param {GenerationConfigType} kwargs 
-     */
-    constructor(kwargs = {}) {
-        // Parameters that control the length of the output
-        this.max_length = kwargs.max_length ?? 20;
-        this.max_new_tokens = kwargs.max_new_tokens ?? null;
-        this.min_length = kwargs.min_length ?? 0;
-        this.min_new_tokens = kwargs.min_new_tokens ?? null;
-        this.early_stopping = kwargs.early_stopping ?? false;
-        this.max_time = kwargs.max_time ?? null;
-
-        // Parameters that control the generation strategy used
-        this.do_sample = kwargs.do_sample ?? false;
-        this.num_beams = kwargs.num_beams ?? 1;
-        this.num_beam_groups = kwargs.num_beam_groups ?? 1;
-        this.penalty_alpha = kwargs.penalty_alpha ?? null;
-        this.use_cache = kwargs.use_cache ?? true;
-
-        // Parameters for manipulation of the model output logits
-        this.temperature = kwargs.temperature ?? 1.0;
-        this.top_k = kwargs.top_k ?? 50;
-        this.top_p = kwargs.top_p ?? 1.0;
-        this.typical_p = kwargs.typical_p ?? 1.0;
-        this.epsilon_cutoff = kwargs.epsilon_cutoff ?? 0.0;
-        this.eta_cutoff = kwargs.eta_cutoff ?? 0.0;
-        this.diversity_penalty = kwargs.diversity_penalty ?? 0.0;
-        this.repetition_penalty = kwargs.repetition_penalty ?? 1.0;
-        this.encoder_repetition_penalty = kwargs.encoder_repetition_penalty ?? 1.0;
-        this.length_penalty = kwargs.length_penalty ?? 1.0;
-        this.no_repeat_ngram_size = kwargs.no_repeat_ngram_size ?? 0;
-        this.bad_words_ids = kwargs.bad_words_ids ?? null;
-        this.force_words_ids = kwargs.force_words_ids ?? null;
-        this.renormalize_logits = kwargs.renormalize_logits ?? false;
-        this.constraints = kwargs.constraints ?? null;
-        this.forced_bos_token_id = kwargs.forced_bos_token_id ?? null;
-        this.forced_eos_token_id = kwargs.forced_eos_token_id ?? null;
-        this.remove_invalid_values = kwargs.remove_invalid_values ?? false;
-        this.exponential_decay_length_penalty = kwargs.exponential_decay_length_penalty ?? null;
-        this.suppress_tokens = kwargs.suppress_tokens ?? null;
-        this.begin_suppress_tokens = kwargs.begin_suppress_tokens ?? null;
-        this.forced_decoder_ids = kwargs.forced_decoder_ids ?? null;
-
-        // Parameters that define the output variables of `generate`
-        this.num_return_sequences = kwargs.num_return_sequences ?? 1;
-        this.output_attentions = kwargs.output_attentions ?? false;
-        this.output_hidden_states = kwargs.output_hidden_states ?? false;
-        this.output_scores = kwargs.output_scores ?? false;
-        this.return_dict_in_generate = kwargs.return_dict_in_generate ?? false;
-
-        // Special tokens that can be used at generation time
-        this.pad_token_id = kwargs.pad_token_id ?? null;
-        this.bos_token_id = kwargs.bos_token_id ?? null;
-        this.eos_token_id = kwargs.eos_token_id ?? null;
-
-        // Generation parameters exclusive to encoder-decoder models
-        this.encoder_no_repeat_ngram_size = kwargs.encoder_no_repeat_ngram_size ?? 0;
-        this.decoder_start_token_id = kwargs.decoder_start_token_id ?? null;
-
-        // Wild card
-        this.generation_kwargs = kwargs.generation_kwargs ?? {};
-    }
-});
-
-/**
- * Sampler is a base class for all sampling methods used for text generation.
- */
-export class Sampler extends Callable {
-    /**
-     * Creates a new Sampler object with the specified generation config.
-     * @param {GenerationConfigType} generation_config The generation config.
-     */
-    constructor(generation_config) {
-        super();
-        this.generation_config = generation_config;
-    }
-
-    /**
-     * Executes the sampler, using the specified logits.
-     * @param {Tensor} logits
-     * @param {number} index
-     * @returns {void}
-     */
-    _call(logits, index = -1) {
-        // Sample from logits, of dims [batch, sequence_length, vocab_size].
-        // If index is specified, sample from [batch, index, vocab_size].
-        return this.sample(logits, index);
-    }
-
-    /**
-     * Abstract method for sampling the logits.
-     * @param {Tensor} logits
-     * @param {number} index
-     * @throws {Error}
-     */
-    sample(logits, index) {
-        throw Error("sample should be implemented in subclasses.")
-    }
-
-    /**
-     * Returns the specified logits as an array, with temperature applied.
-     * @param {Tensor} logits
-     * @param {number} index
-     * @returns {Float32Array}
-     */
-    getLogits(logits, index) {
-        let vocabSize = logits.dims.at(-1);
-
-        let logs = /** @type {Float32Array} */(logits.data);
-
-        if (index === -1) {
-            logs = logs.slice(-vocabSize);
-        } else {
-            let startIndex = index * vocabSize;
-            logs = logs.slice(startIndex, startIndex + vocabSize);
-        }
-
-        // add temperature
-        if (this.generation_config.temperature > 0) {
-            logs = logs.map(x => x / this.generation_config.temperature)
-        }
-        return logs;
-    }
-
-    /**
-     * Selects an item randomly based on the specified probabilities.
-     * @param {Array} probabilities An array of probabilities to use for selection.
-     * @returns {number} The index of the selected item.
-     */
-    randomSelect(probabilities) {
-        // Return index of chosen item
-        let sumProbabilities = probabilities.reduce((acc, curr) => acc + curr, 0);
-
-        let r = Math.random() * sumProbabilities;
-        for (let i = 0; i < probabilities.length; ++i) {
-            r -= probabilities[i];
-            if (r <= 0) {
-                return i;
-            }
-        }
-        return 0; // return first (most probable) as a fallback
-    }
-
-    /**
-     * Returns a Sampler object based on the specified options.
-     * @param {GenerationConfigType} generation_config An object containing options for the sampler.
-     * @returns {Sampler} A Sampler object.
-     */
-    static getSampler(generation_config) {
-        // - *greedy decoding*: `num_beams=1` and `do_sample=False`
-        // - *contrastive search*: `penalty_alpha>0` and `top_k>1`
-        // - *multinomial sampling*: `num_beams=1` and `do_sample=True`
-        // - *beam-search decoding*: `num_beams>1` and `do_sample=False`
-        // - *beam-search multinomial sampling*: `num_beams>1` and `do_sample=True`
-        // - *diverse beam-search decoding*: `num_beams>1` and `num_beam_groups>1`
-        // - *constrained beam-search decoding*: `constraints!=None` or `force_words_ids!=None`
-
-        // NOTE: beam search is implemented directly into the generation function
-        if (generation_config.do_sample) {
-            return new MultinomialSampler(generation_config);
-
-        } else if (generation_config.num_beams > 1) {
-            return new BeamSearchSampler(generation_config);
-
-        } else {
-            if (generation_config.num_return_sequences > 1) {
-                throw Error(`num_return_sequences has to be 1 when doing greedy search, but is ${generation_config.num_return_sequences}.`)
-            }
-            return new GreedySampler(generation_config);
-        }
-    }
-}
-
-/**
- * Class representing a Greedy Sampler.
- * @extends Sampler
- */
-class GreedySampler extends Sampler {
-    /**
-     * Sample the maximum probability of a given logits tensor.
-     * @param {Tensor} logits
-     * @param {number} [index=-1]
-     * @returns {Array} An array with a single tuple, containing the index of the maximum value and a meaningless score (since this is a greedy search).
-     */
-    sample(logits, index = -1) {
-        // NOTE: no need to do log_softmax here since we only take the maximum
-        let logs = this.getLogits(logits, index);
-        let argmax = max(logs)[1];
-
-        // Note: score is meaningless in this context, since we are performing
-        // greedy search (p = 1 => log(p) = 0)
-        return [
-            [argmax, 0]
-        ];
-    }
-}
-
-/**
- * Class representing a MultinomialSampler.
- * @extends Sampler
- */
-class MultinomialSampler extends Sampler {
-
-    /**
-     * Sample from the logits.
-     * @param {Tensor} logits
-     * @param {number} index
-     * @returns {Array}
-     */
-    sample(logits, index = -1) {
-        let k = logits.dims.at(-1); // defaults to vocab size
-        if (this.generation_config.top_k > 0) {
-            k = Math.min(this.generation_config.top_k, k);
-        }
-
-        // Get logits of nth token
-        const logs = this.getLogits(logits, index);
-
-        // Get top k tokens
-        const topLogits = getTopItems(logs, k);
-
-        // Compute softmax over logits
-        const probabilities = softmax(topLogits.map(x => x[1]));
-
-        return Array.from({ length: this.generation_config.num_beams }, () => {
-            const sampledIndex = this.randomSelect(probabilities);
-            return [
-                topLogits[sampledIndex][0], // token id
-                Math.log(probabilities[sampledIndex]), // score
-            ];
-        });
-    }
-}
-
-
-/**
- * Class representing a BeamSearchSampler.
- * @extends Sampler
- */
-class BeamSearchSampler extends Sampler {
-
-    /**
-     * Sample from the logits.
-     * @param {Tensor} logits
-     * @param {number} index
-     * @returns {Array}
-     */
-    sample(logits, index = -1) {
-        let k = logits.dims.at(-1); // defaults to vocab size
-        if (this.generation_config.top_k > 0) {
-            k = Math.min(this.generation_config.top_k, k);
-        }
-
-        // Get logits of nth token
-        const logs = this.getLogits(logits, index);
-
-        // Get top k tokens
-        const topLogits = getTopItems(logs, k);
-
-        // Compute softmax over logits
-        const probabilities = softmax(topLogits.map(x => x[1]));
-
-        return Array.from({ length: this.generation_config.num_beams }, (_, i) => {
-            return [
-                topLogits[i][0], // token id
-                Math.log(probabilities[i]), // score
-            ];
-        });
-    }
-}
diff --git a/src/utils/generic.js b/src/utils/generic.js
new file mode 100644
index 000000000..5ccd467ad
--- /dev/null
+++ b/src/utils/generic.js
@@ -0,0 +1,35 @@
+
+/**
+ * A base class for creating callable objects.
+ * See [here](https://stackoverflow.com/q/76073890) for more information.
+ * 
+ * @type {new () => {(...args: any[]): any, _call(...args: any[]): any}}
+ */
+export const Callable = /** @type {any} */ (class {
+    /**
+    * Creates a new instance of the Callable class.
+    */
+    constructor() {
+        /**
+         * Creates a closure that delegates to a private method '_call' with the given arguments.
+         * @type {any}
+         * @param {...any} args Zero or more arguments to pass to the '_call' method.
+         * @returns {*} The result of calling the '_call' method.
+         */
+        let closure = function (...args) {
+            return closure._call(...args)
+        }
+        return Object.setPrototypeOf(closure, new.target.prototype)
+    }
+
+    /**
+     * This method should be implemented in subclasses to provide the
+     * functionality of the callable object.
+     *
+     * @param {any[]} args
+     * @throws {Error} If the subclass does not implement the `_call` method.
+     */
+    _call(...args) {
+        throw Error('Must implement _call method in subclass')
+    }
+});
diff --git a/src/utils/hub.js b/src/utils/hub.js
old mode 100644
new mode 100755
index 32cab6c5b..71c20c861
--- a/src/utils/hub.js
+++ b/src/utils/hub.js
@@ -13,9 +13,8 @@ import { dispatchCallback } from './core.js';
 
 /**
  * @typedef {Object} PretrainedOptions Options for loading a pretrained model.     
- * @property {boolean?} [quantized=true] Whether to load the 8-bit quantized version of the model (only applicable when loading model files).
  * @property {function} [progress_callback=null] If specified, this function will be called during model construction, to provide the user with progress updates.
- * @property {Object} [config=null] Configuration for the model to use instead of an automatically loaded configuration. Configuration can be automatically loaded when:
+ * @property {import('../configs.js').PretrainedConfig} [config=null] Configuration for the model to use instead of an automatically loaded configuration. Configuration can be automatically loaded when:
  * - The model is a model provided by the library (loaded with the *model id* string of a pretrained model).
  * - The model is loaded by supplying a local directory as `pretrained_model_name_or_path` and a configuration JSON file named *config.json* is found in the directory.
  * @property {string} [cache_dir=null] Path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used.
@@ -23,24 +22,39 @@ import { dispatchCallback } from './core.js';
  * @property {string} [revision='main'] The specific model version to use. It can be a branch name, a tag name, or a commit id,
  * since we use a git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier allowed by git.
  * NOTE: This setting is ignored for local requests.
+ */
+
+/**
+ * @typedef {Object} ModelSpecificPretrainedOptions Options for loading a pretrained model.
+ * @property {string} [subfolder='onnx'] In case the relevant files are located inside a subfolder of the model repo on huggingface.co,
+ * you can specify the folder name here.
  * @property {string} [model_file_name=null] If specified, load the model with this name (excluding the .onnx suffix). Currently only valid for encoder- or decoder-only models.
+ * @property {import("./devices.js").DeviceType|Record<string, import("./devices.js").DeviceType>} [device=null] The device to run the model on. If not specified, the device will be chosen from the environment settings.
+ * @property {import("./dtypes.js").DataType|Record<string, import("./dtypes.js").DataType>} [dtype=null] The data type to use for the model. If not specified, the data type will be chosen from the environment settings.
+ * @property {boolean|Record<string, boolean>} [use_external_data_format=false] Whether to load the model using the external data format (used for models >= 2GB in size).
+ * @property {import('onnxruntime-common').InferenceSession.SessionOptions} [session_options] (Optional) User-specified session options passed to the runtime. If not provided, suitable defaults will be chosen.
  */
 
+/**
+ * @typedef {PretrainedOptions & ModelSpecificPretrainedOptions} PretrainedModelOptions Options for loading a pretrained model.
+ */
+
+/**
+ * Mapping from file extensions to MIME types.
+ */
+const CONTENT_TYPE_MAP = {
+    'txt': 'text/plain',
+    'html': 'text/html',
+    'css': 'text/css',
+    'js': 'text/javascript',
+    'json': 'application/json',
+    'png': 'image/png',
+    'jpg': 'image/jpeg',
+    'jpeg': 'image/jpeg',
+    'gif': 'image/gif',
+}
 class FileResponse {
-    /**
-     * Mapping from file extensions to MIME types.
-     */
-    _CONTENT_TYPE_MAP = {
-        'txt': 'text/plain',
-        'html': 'text/html',
-        'css': 'text/css',
-        'js': 'text/javascript',
-        'json': 'application/json',
-        'png': 'image/png',
-        'jpg': 'image/jpeg',
-        'jpeg': 'image/jpeg',
-        'gif': 'image/gif',
-    }
+
     /**
      * Creates a new `FileResponse` object.
      * @param {string|URL} filePath
@@ -83,7 +97,7 @@ class FileResponse {
     updateContentType() {
         // Set content-type header based on file extension
         const extension = this.filePath.toString().split('.').pop().toLowerCase();
-        this.headers.set('content-type', this._CONTENT_TYPE_MAP[extension] ?? 'application/octet-stream');
+        this.headers.set('content-type', CONTENT_TYPE_MAP[extension] ?? 'application/octet-stream');
     }
 
     /**
@@ -323,7 +337,7 @@ async function tryCache(cache, ...names) {
  * @param {PretrainedOptions} [options] An object containing optional parameters.
  * 
  * @throws Will throw an error if the file is not found and `fatal` is true.
- * @returns {Promise} A Promise that resolves with the file content as a buffer.
+ * @returns {Promise<Uint8Array>} A Promise that resolves with the file content as a buffer.
  */
 export async function getModelFile(path_or_repo_id, filename, fatal = true, options = {}) {
 
diff --git a/src/utils/image.js b/src/utils/image.js
index 89bb2481b..33bdf11d8 100644
--- a/src/utils/image.js
+++ b/src/utils/image.js
@@ -39,7 +39,7 @@ if (BROWSER_ENV) {
         const metadata = await img.metadata();
         const rawChannels = metadata.channels;
 
-        let { data, info } = await img.rotate().raw().toBuffer({ resolveWithObject: true });
+        const { data, info } = await img.rotate().raw().toBuffer({ resolveWithObject: true });
 
         const newImage = new RawImage(new Uint8ClampedArray(data), info.width, info.height, info.channels);
         if (rawChannels !== undefined && rawChannels !== info.channels) {
@@ -125,6 +125,20 @@ export class RawImage {
         }
     }
 
+    /**
+     * Read an image from a canvas.
+     * @param {HTMLCanvasElement|OffscreenCanvas} canvas The canvas to read the image from.
+     * @returns {RawImage} The image object.
+     */
+    static fromCanvas(canvas) {
+        if (!BROWSER_ENV) {
+            throw new Error('fromCanvas() is only supported in browser environments.')
+        }
+
+        const ctx = canvas.getContext('2d');
+        const data = ctx.getImageData(0, 0, canvas.width, canvas.height).data;
+        return new RawImage(data, canvas.width, canvas.height, 4);
+    }
 
     /**
      * Read an image from a URL or file path.
@@ -132,11 +146,11 @@ export class RawImage {
      * @returns {Promise<RawImage>} The image object.
      */
     static async fromURL(url) {
-        let response = await getFile(url);
+        const response = await getFile(url);
         if (response.status !== 200) {
             throw new Error(`Unable to read image from "${url}" (${response.status} ${response.statusText})`);
         }
-        let blob = await response.blob();
+        const blob = await response.blob();
         return this.fromBlob(blob);
     }
 
@@ -148,7 +162,7 @@ export class RawImage {
     static async fromBlob(blob) {
         if (BROWSER_ENV) {
             // Running in environment with canvas
-            let img = await loadImageFunction(blob);
+            const img = await loadImageFunction(blob);
 
             const ctx = createCanvasFunction(img.width, img.height).getContext('2d');
 
@@ -159,7 +173,7 @@ export class RawImage {
 
         } else {
             // Use sharp.js to read (and possible resize) the image.
-            let img = sharp(await blob.arrayBuffer());
+            const img = sharp(await blob.arrayBuffer());
 
             return await loadImageFunction(img);
         }
@@ -204,7 +218,7 @@ export class RawImage {
             return this;
         }
 
-        let newData = new Uint8ClampedArray(this.width * this.height * 1);
+        const newData = new Uint8ClampedArray(this.width * this.height * 1);
         switch (this.channels) {
             case 3: // rgb to grayscale
             case 4: // rgba to grayscale
@@ -231,7 +245,7 @@ export class RawImage {
             return this;
         }
 
-        let newData = new Uint8ClampedArray(this.width * this.height * 3);
+        const newData = new Uint8ClampedArray(this.width * this.height * 3);
 
         switch (this.channels) {
             case 1: // grayscale to rgb
@@ -264,7 +278,7 @@ export class RawImage {
             return this;
         }
 
-        let newData = new Uint8ClampedArray(this.width * this.height * 4);
+        const newData = new Uint8ClampedArray(this.width * this.height * 4);
 
         switch (this.channels) {
             case 1: // grayscale to rgba
@@ -309,10 +323,10 @@ export class RawImage {
             // TODO use `resample` in browser environment
 
             // Store number of channels before resizing
-            let numChannels = this.channels;
+            const numChannels = this.channels;
 
             // Create canvas object for this image
-            let canvas = this.toCanvas();
+            const canvas = this.toCanvas();
 
             // Actually perform resizing using the canvas API
             const ctx = createCanvasFunction(width, height).getContext('2d');
@@ -321,7 +335,7 @@ export class RawImage {
             ctx.drawImage(canvas, 0, 0, width, height);
 
             // Create image from the resized data
-            let resizedImage = new RawImage(ctx.getImageData(0, 0, width, height).data, width, height, 4);
+            const resizedImage = new RawImage(ctx.getImageData(0, 0, width, height).data, width, height, 4);
 
             // Convert back so that image has the same number of channels as before
             return resizedImage.convert(numChannels);
@@ -380,13 +394,13 @@ export class RawImage {
 
         if (BROWSER_ENV) {
             // Store number of channels before padding
-            let numChannels = this.channels;
+            const numChannels = this.channels;
 
             // Create canvas object for this image
-            let canvas = this.toCanvas();
+            const canvas = this.toCanvas();
 
-            let newWidth = this.width + left + right;
-            let newHeight = this.height + top + bottom;
+            const newWidth = this.width + left + right;
+            const newHeight = this.height + top + bottom;
 
             // Create a new canvas of the desired size.
             const ctx = createCanvasFunction(newWidth, newHeight).getContext('2d');
@@ -398,7 +412,7 @@ export class RawImage {
             );
 
             // Create image from the padded data
-            let paddedImage = new RawImage(
+            const paddedImage = new RawImage(
                 ctx.getImageData(0, 0, newWidth, newHeight).data,
                 newWidth, newHeight, 4);
 
@@ -406,7 +420,7 @@ export class RawImage {
             return paddedImage.convert(numChannels);
 
         } else {
-            let img = this.toSharp().extend({ left, right, top, bottom });
+            const img = this.toSharp().extend({ left, right, top, bottom });
             return await loadImageFunction(img);
         }
     }
@@ -470,16 +484,16 @@ export class RawImage {
         }
 
         // Determine bounds of the image in the new canvas
-        let width_offset = (this.width - crop_width) / 2;
-        let height_offset = (this.height - crop_height) / 2;
+        const width_offset = (this.width - crop_width) / 2;
+        const height_offset = (this.height - crop_height) / 2;
 
 
         if (BROWSER_ENV) {
             // Store number of channels before resizing
-            let numChannels = this.channels;
+            const numChannels = this.channels;
 
             // Create canvas object for this image
-            let canvas = this.toCanvas();
+            const canvas = this.toCanvas();
 
             // Create a new canvas of the desired size. This is needed since if the 
             // image is too small, we need to pad it with black pixels.
@@ -509,7 +523,7 @@ export class RawImage {
             );
 
             // Create image from the resized data
-            let resizedImage = new RawImage(ctx.getImageData(0, 0, crop_width, crop_height).data, crop_width, crop_height, 4);
+            const resizedImage = new RawImage(ctx.getImageData(0, 0, crop_width, crop_height).data, crop_width, crop_height, 4);
 
             // Convert back so that image has the same number of channels as before
             return resizedImage.convert(numChannels);
@@ -529,8 +543,8 @@ export class RawImage {
             } else if (width_offset <= 0 && height_offset <= 0) {
                 // Cropped image lies entirely outside the original image,
                 // so we add padding
-                let top = Math.floor(-height_offset);
-                let left = Math.floor(-width_offset);
+                const top = Math.floor(-height_offset);
+                const left = Math.floor(-width_offset);
                 img = img.extend({
                     top: top,
                     left: left,
@@ -611,13 +625,13 @@ export class RawImage {
 
         // Clone, and convert data to RGBA before drawing to canvas.
         // This is because the canvas API only supports RGBA
-        let cloned = this.clone().rgba();
+        const cloned = this.clone().rgba();
 
         // Create canvas object for the cloned image
-        let clonedCanvas = createCanvasFunction(cloned.width, cloned.height);
+        const clonedCanvas = createCanvasFunction(cloned.width, cloned.height);
 
         // Draw image to context
-        let data = new ImageDataClass(cloned.data, cloned.width, cloned.height);
+        const data = new ImageDataClass(cloned.data, cloned.width, cloned.height);
         clonedCanvas.getContext('2d').putImageData(data, 0, 0);
 
         return clonedCanvas;
@@ -728,4 +742,4 @@ export class RawImage {
             }
         });
     }
-}
+}
\ No newline at end of file
diff --git a/src/utils/maths.js b/src/utils/maths.js
index 319f4a347..e6cb2d6ca 100644
--- a/src/utils/maths.js
+++ b/src/utils/maths.js
@@ -190,27 +190,6 @@ export function dot(arr1, arr2) {
     return result;
 }
 
-
-/**
- * Get the top k items from an iterable, sorted by descending order
- * @param {any[]|TypedArray} items The items to be sorted
- * @param {number|null} [top_k=0] The number of top items to return (default: 0 = return all)
- * @returns {[number, any][]} The top k items, sorted by descending order
- */
-export function getTopItems(items, top_k = 0) {
-    // if top == 0, return all
-
-    items = Array.from(items)
-        .map((x, i) => [i, x])            // Get indices ([index, score])
-        .sort((a, b) => b[1] - a[1])      // Sort by log probabilities
-
-    if (top_k !== null && top_k > 0) {
-        items = items.slice(0, top_k);    // Get top k items
-    }
-
-    return items
-}
-
 /**
  * Computes the cosine similarity between two arrays.
  *
@@ -247,7 +226,7 @@ export function magnitude(arr) {
 /**
  * Returns the value and index of the minimum element in an array.
  * @param {number[]|TypedArray} arr array of numbers.
- * @returns {number[]} the value and index of the minimum element, of the form: [valueOfMin, indexOfMin]
+ * @returns {[number, number]} the value and index of the minimum element, of the form: [valueOfMin, indexOfMin]
  * @throws {Error} If array is empty.
  */
 export function min(arr) {
@@ -992,3 +971,89 @@ export function bankers_round(x) {
     const br = Math.abs(x) % 1 === 0.5 ? (r % 2 === 0 ? r : r - 1) : r;
     return br;
 }
+
+
+/**
+ * Measures similarity between two temporal sequences (e.g., input audio and output tokens
+ * to generate token-level timestamps).
+ * @param {number[][]} matrix 
+ * @returns {number[][]}
+ */
+export function dynamic_time_warping(matrix) {
+    const output_length = matrix.length;
+    const input_length = matrix[0].length;
+
+    const outputShape = [output_length + 1, input_length + 1];
+
+    const cost = Array.from(
+        { length: outputShape[0] },
+        () => Array(outputShape[1]).fill(Infinity)
+    );
+    cost[0][0] = 0;
+
+    const trace = Array.from(
+        { length: outputShape[0] },
+        () => Array(outputShape[1]).fill(-1)
+    );
+
+    for (let j = 1; j < outputShape[1]; ++j) {
+        for (let i = 1; i < outputShape[0]; ++i) {
+            const c0 = cost[i - 1][j - 1];
+            const c1 = cost[i - 1][j];
+            const c2 = cost[i][j - 1];
+
+            let c, t;
+            if (c0 < c1 && c0 < c2) {
+                c = c0;
+                t = 0;
+            } else if (c1 < c0 && c1 < c2) {
+                c = c1;
+                t = 1;
+            } else {
+                c = c2;
+                t = 2;
+            }
+            cost[i][j] = matrix[i - 1][j - 1] + c;
+            trace[i][j] = t;
+        }
+    }
+
+    for (let i = 0; i < outputShape[1]; ++i) { // trace[0, :] = 2
+        trace[0][i] = 2;
+    }
+    for (let i = 0; i < outputShape[0]; ++i) { // trace[:, 0] = 1
+        trace[i][0] = 1;
+    }
+
+    // backtrace
+    let i = output_length;
+    let j = input_length;
+    let text_indices = [];
+    let time_indices = [];
+    while (i > 0 || j > 0) {
+        text_indices.push(i - 1);
+        time_indices.push(j - 1);
+
+        switch (trace[i][j]) {
+            case 0:
+                --i; --j;
+                break;
+            case 1:
+                --i;
+                break;
+            case 2:
+                --j;
+                break;
+            default:
+                throw new Error(
+                    `Internal error in dynamic time warping. Unexpected trace[${i}, ${j}]. Please file a bug report.`
+                )
+        }
+    }
+
+    text_indices.reverse();
+    time_indices.reverse();
+
+    return [text_indices, time_indices];
+
+}
diff --git a/src/utils/tensor.js b/src/utils/tensor.js
index 469054cac..536a8c249 100644
--- a/src/utils/tensor.js
+++ b/src/utils/tensor.js
@@ -1,22 +1,26 @@
 /**
  * @file Helper module for `Tensor` processing.
- * 
- * These functions and classes are only used internally, 
+ *
+ * These functions and classes are only used internally,
  * meaning an end-user shouldn't need to access anything here.
- * 
+ *
  * @module utils/tensor
  */
 
-import { ONNX } from '../backends/onnx.js';
-
 import {
     interpolate_data,
     permute_data
 } from './maths.js';
 
+import {
+    Tensor as ONNXTensor, isONNXTensor,
+} from '../backends/onnx.js';
+
+import { TensorOpRegistry } from '../ops/registry.js';
 
 const DataTypeMap = Object.freeze({
     float32: Float32Array,
+    float16: Uint16Array,
     float64: Float64Array,
     string: Array, // string[]
     int8: Int8Array,
@@ -35,37 +39,55 @@ const DataTypeMap = Object.freeze({
  * @typedef {import('./maths.js').AnyTypedArray | any[]} DataArray
  */
 
-const ONNXTensor = ONNX.Tensor;
 
 export class Tensor {
     /** @type {number[]} Dimensions of the tensor. */
-    dims;
+    get dims() {
+        // @ts-ignore
+        return this.ort_tensor.dims;
+    }
+    set dims(value) {
+        // FIXME: ONNXTensor declares dims as readonly so one needs to use the constructor() if dims change.
+        // @ts-ignore
+        this.ort_tensor.dims = value;
+    }
 
     /** @type {DataType} Type of the tensor. */
-    type;
+    get type() {
+        return this.ort_tensor.type;
+    };
 
     /** @type {DataArray} The data stored in the tensor. */
-    data;
+    get data() {
+        return this.ort_tensor.data;
+    }
 
     /** @type {number} The number of elements in the tensor. */
-    size;
+    get size() {
+        return this.ort_tensor.size;
+    };
+
+    /** @type {string} The location of the tensor data. */
+    get location() {
+        return this.ort_tensor.location;
+    };
+
+    ort_tensor;
 
     /**
      * Create a new Tensor or copy an existing Tensor.
-     * @param {[DataType, DataArray, number[]]|[import('onnxruntime-common').Tensor]} args
+     * @param {[DataType, DataArray, number[]]|[ONNXTensor]} args
      */
     constructor(...args) {
-        if (args[0] instanceof ONNXTensor) {
-            // Create shallow copy
-            Object.assign(this, args[0]);
-
+        if (isONNXTensor(args[0])) {
+            this.ort_tensor = /** @type {ONNXTensor} */ (args[0]);
         } else {
             // Create new tensor
-            Object.assign(this, new ONNXTensor(
+            this.ort_tensor = new ONNXTensor(
                 /** @type {DataType} */(args[0]),
                 /** @type {Exclude<import('./maths.js').AnyTypedArray, Uint8ClampedArray>} */(args[1]),
                 args[2]
-            ));
+            );
         }
 
         return new Proxy(this, {
@@ -89,6 +111,11 @@ export class Tensor {
         });
     }
 
+    dispose() {
+        this.ort_tensor.dispose();
+        // this.ort_tensor = undefined;
+    }
+
     /**
      * Returns an iterator object for iterating over the tensor data in row-major order.
      * If the tensor has more than one dimension, the iterator will yield subarrays.
@@ -131,9 +158,10 @@ export class Tensor {
      * @returns {number} The index of the first occurrence of item in the tensor data.
      */
     indexOf(item) {
-        for (let index = 0; index < this.data.length; ++index) {
+        const this_data = this.data;
+        for (let index = 0; index < this_data.length; ++index) {
             // Note: == instead of === so we can match Ints with BigInts
-            if (this.data[index] == item) {
+            if (this_data[index] == item) {
                 return index;
             }
         }
@@ -141,9 +169,9 @@ export class Tensor {
     }
 
     /**
-     * @param {number} index 
-     * @param {number} iterSize 
-     * @param {any} iterDims 
+     * @param {number} index
+     * @param {number} iterSize
+     * @param {any} iterDims
      * @returns {Tensor}
      */
     _subarray(index, iterSize, iterDims) {
@@ -165,10 +193,11 @@ export class Tensor {
      * @throws {Error} If the tensor has more than one element.
      */
     item() {
-        if (this.data.length !== 1) {
-            throw new Error(`a Tensor with ${this.data.length} elements cannot be converted to Scalar`);
+        const this_data = this.data;
+        if (this_data.length !== 1) {
+            throw new Error(`a Tensor with ${this_data.length} elements cannot be converted to Scalar`);
         }
-        return this.data[0];
+        return this_data[0];
     }
 
     /**
@@ -192,8 +221,33 @@ export class Tensor {
      * @returns {Tensor} Returns `this`.
      */
     sigmoid_() {
-        for (let i = 0; i < this.data.length; ++i) {
-            this.data[i] = 1 / (1 + Math.exp(-this.data[i]));
+        const this_data = this.data;
+        for (let i = 0; i < this_data.length; ++i) {
+            this_data[i] = 1 / (1 + Math.exp(-this_data[i]));
+        }
+        return this;
+    }
+
+    /**
+     * Return a new Tensor with a callback function applied to each element.
+     * @param {Function} callback - The function to apply to each element. It should take three arguments:
+     *                              the current element, its index, and the tensor's data array.
+     * @returns {Tensor} A new Tensor with the callback function applied to each element.
+     */
+    map(callback) {
+        return this.clone().map_(callback);
+    }
+
+    /**
+     * Apply a callback function to each element of the tensor in place.
+     * @param {Function} callback - The function to apply to each element. It should take three arguments:
+     *                              the current element, its index, and the tensor's data array.
+     * @returns {Tensor} Returns `this`.
+     */
+    map_(callback) {
+        const this_data = this.data;
+        for (let i = 0; i < this_data.length; ++i) {
+            this_data[i] = callback(this_data[i], i, this_data);
         }
         return this;
     }
@@ -213,12 +267,34 @@ export class Tensor {
      * @returns {Tensor} Returns `this`.
      */
     mul_(val) {
-        for (let i = 0; i < this.data.length; ++i) {
-            this.data[i] *= val;
+        const this_data = this.data;
+        for (let i = 0; i < this_data.length; ++i) {
+            this_data[i] *= val;
         }
         return this;
     }
 
+    /**
+     * Return a new Tensor with every element divided by a constant.
+     * @param {number} val The value to divide by.
+     * @returns {Tensor} The new tensor.
+     */
+    div(val) {
+        return this.clone().div_(val);
+    }
+
+    /**
+     * Divide the tensor by a constant in place.
+     * @param {number} val The value to divide by.
+     * @returns {Tensor} Returns `this`.
+     */
+    div_(val) {
+        const this_data = this.data;
+        for (let i = 0; i < this_data.length; ++i) {
+            this_data[i] /= val;
+        }
+        return this;
+    }
 
     /**
      * Return a new Tensor with every element added by a constant.
@@ -235,19 +311,43 @@ export class Tensor {
      * @returns {Tensor} Returns `this`.
      */
     add_(val) {
-        for (let i = 0; i < this.data.length; ++i) {
-            this.data[i] += val;
+        const this_data = this.data;
+        for (let i = 0; i < this_data.length; ++i) {
+            this_data[i] += val;
         }
         return this;
     }
+
+    /**
+     * Return a new Tensor with every element subtracted by a constant.
+     * @param {number} val The value to subtract by.
+     * @returns {Tensor} The new tensor.
+     */
+    sub(val) {
+        return this.clone().sub_(val);
+    }
+
+    /**
+     * Subtract the tensor by a constant in place.
+     * @param {number} val The value to subtract by.
+     * @returns {Tensor} Returns `this`.
+     */
+    sub_(val) {
+        const this_data = this.data;
+        for (let i = 0; i < this_data.length; ++i) {
+            this_data[i] -= val;
+        }
+        return this;
+    }
+
     clone() {
         return new Tensor(this.type, this.data.slice(), this.dims.slice());
     }
 
     slice(...slices) {
         // This allows for slicing with ranges and numbers
-        let newTensorDims = [];
-        let newOffsets = [];
+        const newTensorDims = [];
+        const newOffsets = [];
 
         // slices is an array of numbers or arrays of numbers
         // e.g., slices = [0, [1, 3], null, [0, 3]]
@@ -267,14 +367,21 @@ export class Tensor {
 
             } else if (Array.isArray(slice) && slice.length === 2) {
                 // An array of length 2 means take a range of elements
-
-                if (slice[0] > slice[1]) {
+                let [start, end] = slice;
+                start = start === null
+                    ? 0
+                    : safeIndex(start, this.dims[sliceIndex], sliceIndex, false);
+                end = end === null
+                    ? this.dims[sliceIndex]
+                    : safeIndex(end, this.dims[sliceIndex], sliceIndex, false);
+
+                if (start > end) {
                     throw new Error(`Invalid slice: ${slice}`);
                 }
 
-                let offsets = [
-                    Math.max(slice[0], 0),
-                    Math.min(slice[1], this.dims[sliceIndex])
+                const offsets = [
+                    Math.max(start, 0),
+                    Math.min(end, this.dims[sliceIndex])
                 ];
 
                 newOffsets.push(offsets);
@@ -285,12 +392,13 @@ export class Tensor {
             }
         }
 
-        let newDims = newOffsets.map(([start, end]) => end - start);
-        let newBufferSize = newDims.reduce((a, b) => a * b);
+        const newDims = newOffsets.map(([start, end]) => end - start);
+        const newBufferSize = newDims.reduce((a, b) => a * b);
 
+        const this_data = this.data;
         // Allocate memory
         // @ts-ignore
-        let data = new this.data.constructor(newBufferSize);
+        const data = new this_data.constructor(newBufferSize);
 
         // Precompute strides
         const stride = this.stride();
@@ -302,7 +410,7 @@ export class Tensor {
                 originalIndex += ((num % size) + newOffsets[j][0]) * stride[j];
                 num = Math.floor(num / size);
             }
-            data[i] = this.data[originalIndex];
+            data[i] = this_data[originalIndex];
         }
         return new Tensor(this.type, data, newTensorDims);
 
@@ -326,7 +434,7 @@ export class Tensor {
 
     /**
      * Returns the sum of each row of the input tensor in the given dimension dim.
-     * 
+     *
      * @param {number} [dim=null] The dimension or dimensions to reduce. If `null`, all dimensions are reduced.
      * @param {boolean} keepdim Whether the output tensor has `dim` retained or not.
      * @returns The summed tensor
@@ -351,9 +459,11 @@ export class Tensor {
             throw Error(`Unsupported norm: ${p}`);
         }
 
+        const this_data = this.data;
+
         if (dim === null) {
             // @ts-ignore
-            let val = this.data.reduce((a, b) => a + (b ** p), 0) ** (1 / p);
+            let val = this_data.reduce((a, b) => a + (b ** p), 0) ** (1 / p);
             return new Tensor(this.type, [val], []);
         }
 
@@ -366,10 +476,10 @@ export class Tensor {
 
         // Create a new array to store the accumulated values
         // @ts-ignore
-        const result = new this.data.constructor(this.data.length / this.dims[dim]);
+        const result = new this_data.constructor(this_data.length / this.dims[dim]);
 
         // Iterate over the data array
-        for (let i = 0; i < this.data.length; ++i) {
+        for (let i = 0; i < this_data.length; ++i) {
 
             // Calculate the index in the resulting array
             let resultIndex = 0;
@@ -385,7 +495,7 @@ export class Tensor {
             }
 
             // Accumulate the value at the current index
-            result[resultIndex] += (this.data[i]) ** p;
+            result[resultIndex] += (this_data[i]) ** p;
         }
 
         if (p !== 1) {
@@ -412,7 +522,9 @@ export class Tensor {
 
         const norm = this.norm(p, dim, true);
 
-        for (let i = 0; i < this.data.length; ++i) {
+        const this_data = this.data;
+        const norm_data = norm.data;
+        for (let i = 0; i < this_data.length; ++i) {
 
             // Calculate the index in the resulting array
             let resultIndex = 0;
@@ -428,7 +540,7 @@ export class Tensor {
             }
 
             // Divide by normalized value
-            this.data[i] /= norm.data[resultIndex];
+            this_data[i] /= norm_data[resultIndex];
         }
 
         return this;
@@ -455,12 +567,12 @@ export class Tensor {
 
     /**
      * Returns a tensor with all specified dimensions of input of size 1 removed.
-     * 
+     *
      * NOTE: The returned tensor shares the storage with the input tensor, so changing the contents of one will change the contents of the other.
      * If you would like a copy, use `tensor.clone()` before squeezing.
-     * 
+     *
      * @param {number} [dim=null] If given, the input will be squeezed only in the specified dimensions.
-     * @returns The squeezed tensor
+     * @returns {Tensor} The squeezed tensor
      */
     squeeze(dim = null) {
         return new Tensor(
@@ -480,11 +592,11 @@ export class Tensor {
 
     /**
      * Returns a new tensor with a dimension of size one inserted at the specified position.
-     * 
+     *
      * NOTE: The returned tensor shares the same underlying data with this tensor.
-     * 
+     *
      * @param {number} dim The index at which to insert the singleton dimension
-     * @returns The unsqueezed tensor
+     * @returns {Tensor} The unsqueezed tensor
      */
     unsqueeze(dim = null) {
         return new Tensor(
@@ -523,7 +635,7 @@ export class Tensor {
      * and ending with `end_dim` are flattened. The order of elements in input is unchanged.
      * @param {number} start_dim the first dim to flatten
      * @param {number} end_dim the last dim to flatten
-     * @returns The flattened tensor.
+     * @returns {Tensor} The flattened tensor.
      */
     flatten(start_dim = 0, end_dim = -1) {
         return this.clone().flatten_(start_dim, end_dim);
@@ -546,20 +658,22 @@ export class Tensor {
             }
         }
 
+        const this_data = this.data;
         if (inferredIndex !== -1) {
             // Some dimension must be inferred
             const productOther = dims.reduce((product, curr, index) => {
                 return index !== inferredIndex ? product * curr : product
             }, 1);
 
-            dims[inferredIndex] = this.data.length / productOther;
+            dims[inferredIndex] = this_data.length / productOther;
         }
-        return new Tensor(this.type, this.data, dims); // NOTE: uses same underlying storage
+        return new Tensor(this.type, this_data, dims); // NOTE: uses same underlying storage
     }
 
     neg_() {
-        for (let i = 0; i < this.data.length; ++i) {
-            this.data[i] = -this.data[i];
+        const this_data = this.data;
+        for (let i = 0; i < this_data.length; ++i) {
+            this_data[i] = -this_data[i];
         }
         return this;
     }
@@ -571,8 +685,9 @@ export class Tensor {
      * In-place version of @see {@link Tensor.clamp}
      */
     clamp_(min, max) {
-        for (let i = 0; i < this.data.length; ++i) {
-            this.data[i] = Math.min(Math.max(this.data[i], min), max);
+        const this_data = this.data;
+        for (let i = 0; i < this_data.length; ++i) {
+            this_data[i] = Math.min(Math.max(this_data[i], min), max);
         }
         return this;
     }
@@ -581,7 +696,7 @@ export class Tensor {
      * Clamps all elements in input into the range [ min, max ]
      * @param {number} min lower-bound of the range to be clamped to
      * @param {number} max upper-bound of the range to be clamped to
-     * @returns the output tensor.
+     * @returns {Tensor} the output tensor.
      */
     clamp(min, max) {
         return this.clone().clamp_(min, max);
@@ -591,20 +706,25 @@ export class Tensor {
      * In-place version of @see {@link Tensor.round}
      */
     round_() {
-        for (let i = 0; i < this.data.length; ++i) {
-            this.data[i] = Math.round(this.data[i]);
+        const this_data = this.data;
+        for (let i = 0; i < this_data.length; ++i) {
+            this_data[i] = Math.round(this_data[i]);
         }
         return this;
     }
 
     /**
      * Rounds elements of input to the nearest integer.
-     * @returns the output tensor.
+     * @returns {Tensor} the output tensor.
      */
     round() {
         return this.clone().round_();
     }
 
+    mean(dim = null, keepdim = false) {
+        return mean(this, dim, keepdim);
+    }
+
     /**
      * Performs Tensor dtype conversion.
      * @param {DataType} type The desired data type.
@@ -625,7 +745,7 @@ export class Tensor {
 
 /**
  * This creates a nested array of a given type and depth (see examples).
- * 
+ *
  * @example
  *   NestArray<string, 1>; // string[]
  * @example
@@ -718,6 +838,105 @@ export function interpolate(input, [out_height, out_width], mode = 'bilinear', a
     return new Tensor(input.type, output, [in_channels, out_height, out_width]);
 }
 
+
+/**
+ * Down/up samples the input.
+ * Inspired by https://pytorch.org/docs/stable/generated/torch.nn.functional.interpolate.html.
+ * @param {Tensor} input the input tensor
+ * @param {Object} options the options for the interpolation
+ * @param {[number, number]|[number, number, number]|[number, number, number, number]} [options.size=null] output spatial size.
+ * @param {"bilinear"|"bicubic"} [options.mode='bilinear'] algorithm used for upsampling
+ * @returns {Promise<Tensor>} The interpolated tensor.
+ */
+export async function interpolate_4d(input, {
+    size = null,
+    mode = 'bilinear',
+} = {}) {
+
+    // Error checking
+    if (input.dims.length !== 4) {
+        throw new Error('`interpolate_4d` currently only supports 4D input.');
+    }
+    if (!size) {
+        // TODO: support scale_factor
+        throw new Error('`interpolate_4d` requires a `size` argument.');
+    }
+
+    // Fill in missing dimensions
+    let targetDims;
+    if (size.length === 2) {
+        targetDims = [...input.dims.slice(0, 2), ...size];
+    } else if (size.length === 3) {
+        targetDims = [input.dims[0], ...size];
+    } else if (size.length === 4) {
+        targetDims = size;
+    } else {
+        throw new Error('`size` must be of length 2, 3, or 4.');
+    }
+
+    let op;
+    if (mode === 'bilinear') {
+        op = await TensorOpRegistry.bilinear_interpolate_4d;
+    } else if (mode === 'bicubic') {
+        op = await TensorOpRegistry.bicubic_interpolate_4d;
+    } else {
+        throw new Error(`Unsupported mode: ${mode}`);
+    }
+
+    const sizeTensor = new Tensor('int64', new BigInt64Array(targetDims.map(BigInt)), [targetDims.length]);
+    return await op({ x: input, s: sizeTensor });
+}
+
+/**
+ * Matrix product of two tensors.
+ * Inspired by https://pytorch.org/docs/stable/generated/torch.matmul.html
+ * @param {Tensor} a the first tensor to be multiplied
+ * @param {Tensor} b the second tensor to be multiplied
+ * @returns {Promise<Tensor>} The matrix product of the two tensors.
+ */
+export async function matmul(a, b) {
+    const op = await TensorOpRegistry.matmul;
+    return await op({ a, b });
+}
+
+/**
+ * Computes the one dimensional Fourier transform of real-valued input.
+ * Inspired by https://pytorch.org/docs/stable/generated/torch.fft.rfft.html
+ * @param {Tensor} x the real input tensor
+ * @param {Tensor} a The dimension along which to take the one dimensional real FFT.
+ * @returns {Promise<Tensor>} the output tensor.
+ */
+export async function rfft(x, a) {
+    const op = await TensorOpRegistry.rfft;
+    return await op({ x, a });
+}
+
+
+/**
+ * Returns the k largest elements of the given input tensor.
+ * Inspired by https://pytorch.org/docs/stable/generated/torch.topk.html
+ * @param {Tensor} x the input tensor
+ * @param {number} k the k in "top-k"
+ * @returns {Promise<[Tensor, Tensor]>} the output tuple of (Tensor, LongTensor) of top-k elements and their indices.
+ */
+export async function topk(x, k) {
+    const op = await TensorOpRegistry.top_k;
+
+    if (k === null) {
+        k = x.dims.at(-1);
+    } else {
+        k = Math.min(k, x.dims.at(-1));
+    }
+    return await op({
+        x,
+        k: new Tensor(
+            'int64',
+            [BigInt(k)],
+            [1]
+        )
+    });
+}
+
 /**
  * Perform mean pooling of the last hidden state followed by a normalization step.
  * @param {Tensor} last_hidden_state Tensor of shape [batchSize, seqLength, embedDim]
@@ -727,32 +946,35 @@ export function interpolate(input, [out_height, out_width], mode = 'bilinear', a
 export function mean_pooling(last_hidden_state, attention_mask) {
     // last_hidden_state: [batchSize, seqLength, embedDim]
     // attention_mask:    [batchSize, seqLength]
+    const lastHiddenStateData = last_hidden_state.data;
+    const attentionMaskData = attention_mask.data;
+
+    const shape = [last_hidden_state.dims[0], last_hidden_state.dims[2]];
 
-    let shape = [last_hidden_state.dims[0], last_hidden_state.dims[2]];
     // @ts-ignore
-    let returnedData = new last_hidden_state.data.constructor(shape[0] * shape[1]);
-    let [batchSize, seqLength, embedDim] = last_hidden_state.dims;
+    const returnedData = new lastHiddenStateData.constructor(shape[0] * shape[1]);
+    const [batchSize, seqLength, embedDim] = last_hidden_state.dims;
 
     let outIndex = 0;
     for (let i = 0; i < batchSize; ++i) {
-        let offset = i * embedDim * seqLength;
+        const offset = i * embedDim * seqLength;
 
         for (let k = 0; k < embedDim; ++k) {
             let sum = 0;
             let count = 0;
 
-            let attnMaskOffset = i * seqLength;
-            let offset2 = offset + k;
+            const attnMaskOffset = i * seqLength;
+            const offset2 = offset + k;
             // Pool over all words in sequence
             for (let j = 0; j < seqLength; ++j) {
                 // index into attention mask
-                let attn = Number(attention_mask.data[attnMaskOffset + j]);
+                const attn = Number(attentionMaskData[attnMaskOffset + j]);
 
                 count += attn;
-                sum += last_hidden_state.data[offset2 + j * embedDim] * attn;
+                sum += lastHiddenStateData[offset2 + j * embedDim] * attn;
             }
 
-            let avg = sum / count;
+            const avg = sum / count;
             returnedData[outIndex++] = avg;
         }
     }
@@ -786,15 +1008,19 @@ export function layer_norm(input, normalized_shape, {
     }
 
     const [std, mean] = std_mean(input, 1, 0, true);
+    const stdData = /** @type {Float32Array} */(std.data);
+    const meanData = /** @type {Float32Array} */(mean.data);
+
+    const inputData = /** @type {Float32Array} */(input.data);
 
     // @ts-ignore
-    const returnedData = new input.data.constructor(input.data.length);
+    const returnedData = new inputData.constructor(inputData.length);
 
     for (let i = 0; i < batchSize; ++i) {
         const offset = i * featureDim;
         for (let j = 0; j < featureDim; ++j) {
             const offset2 = offset + j;
-            returnedData[offset2] = (input.data[offset2] - mean.data[i]) / (std.data[i] + eps);
+            returnedData[offset2] = (inputData[offset2] - meanData[i]) / (stdData[i] + eps);
         }
     }
     return new Tensor(input.type, returnedData, input.dims);
@@ -804,7 +1030,7 @@ export function layer_norm(input, normalized_shape, {
  * Helper function to calculate new dimensions when performing a squeeze operation.
  * @param {number[]} dims The dimensions of the tensor.
  * @param {number|number[]|null} dim The dimension(s) to squeeze.
- * @returns The new dimensions.
+ * @returns {number[]} The new dimensions.
  * @private
  */
 function calc_squeeze_dims(dims, dim) {
@@ -827,7 +1053,7 @@ function calc_squeeze_dims(dims, dim) {
  * Helper function to calculate new dimensions when performing an unsqueeze operation.
  * @param {number[]} dims The dimensions of the tensor.
  * @param {number} dim The dimension to unsqueeze.
- * @returns The new dimensions.
+ * @returns {number[]} The new dimensions.
  * @private
  */
 function calc_unsqueeze_dims(dims, dim) {
@@ -846,12 +1072,12 @@ function calc_unsqueeze_dims(dims, dim) {
  * @param {number} size The size of the array.
  * @param {number} [dimension=null] The dimension that the index is for (optional).
  * @returns {number} The index, guaranteed to be non-negative and less than `arrayLength`.
- * 
+ *
  * @throws {Error} If the index is out of range.
  * @private
  */
-function safeIndex(index, size, dimension = null) {
-    if (index < -size || index >= size) {
+function safeIndex(index, size, dimension = null, boundsCheck = true) {
+    if (boundsCheck && (index < -size || index >= size)) {
         throw new Error(`IndexError: index ${index} is out of bounds for dimension${dimension === null ? '' : ' ' + dimension} with size ${size}`);
     }
 
@@ -888,9 +1114,10 @@ export function cat(tensors, dim = 0) {
         // Handle special case for performance reasons
 
         let offset = 0;
-        for (let t of tensors) {
-            result.set(t.data, offset);
-            offset += t.data.length;
+        for (const tensor of tensors) {
+            const tensorData = tensor.data;
+            result.set(tensorData, offset);
+            offset += tensorData.length;
         }
 
     } else {
@@ -898,15 +1125,15 @@ export function cat(tensors, dim = 0) {
         let currentDim = 0;
 
         for (let t = 0; t < tensors.length; ++t) {
-            let tensor = tensors[t];
+            const { data, dims } = tensors[t];
 
             // Iterate over the data array
-            for (let i = 0; i < tensor.data.length; ++i) {
+            for (let i = 0; i < data.length; ++i) {
                 // Calculate the index in the resulting array
                 let resultIndex = 0;
 
-                for (let j = tensor.dims.length - 1, num = i, resultMultiplier = 1; j >= 0; --j) {
-                    const size = tensor.dims[j];
+                for (let j = dims.length - 1, num = i, resultMultiplier = 1; j >= 0; --j) {
+                    const size = dims[j];
                     let index = num % size;
                     if (j === dim) {
                         index += currentDim;
@@ -916,10 +1143,10 @@ export function cat(tensors, dim = 0) {
                     num = Math.floor(num / size);
                 }
                 // Accumulate the value at the current index
-                result[resultIndex] = tensor.data[i];
+                result[resultIndex] = data[i];
             }
 
-            currentDim += tensor.dims[dim];
+            currentDim += dims[dim];
         }
     }
     return new Tensor(resultType, result, resultDims);
@@ -947,14 +1174,14 @@ export function stack(tensors, dim = 0) {
  * @returns {Tensor[]} A tuple of (std, mean) tensors.
  */
 export function std_mean(input, dim = null, correction = 1, keepdim = false) {
+    const inputData = /** @type {Float32Array} */(input.data);
+    const inputDims = input.dims;
 
     if (dim === null) {
         // None to reduce over all dimensions.
-        // @ts-ignore
-        const sum = input.data.reduce((a, b) => a + b, 0);
-        const mean = sum / input.data.length;
-        // @ts-ignore
-        const std = Math.sqrt(input.data.reduce((a, b) => a + (b - mean) ** 2, 0) / (input.data.length - correction));
+        const sum = inputData.reduce((a, b) => a + b, 0);
+        const mean = sum / inputData.length;
+        const std = Math.sqrt(inputData.reduce((a, b) => a + (b - mean) ** 2, 0) / (inputData.length - correction));
 
         const meanTensor = new Tensor(input.type, [mean], [/* scalar */]);
         const stdTensor = new Tensor(input.type, [std], [/* scalar */]);
@@ -963,26 +1190,27 @@ export function std_mean(input, dim = null, correction = 1, keepdim = false) {
     }
 
     // Negative indexing
-    dim = safeIndex(dim, input.dims.length);
+    dim = safeIndex(dim, inputDims.length);
 
     const meanTensor = mean(input, dim, keepdim);
+    const meanTensorData = meanTensor.data;
 
     // Calculate the shape of the resulting array after summation
-    const resultDims = input.dims.slice(); // Copy the original dimensions
+    const resultDims = inputDims.slice(); // Copy the original dimensions
     resultDims[dim] = 1; // Remove the specified axis
 
     // Create a new array to store the accumulated values
     // @ts-ignore
-    const result = new input.data.constructor(input.data.length / input.dims[dim]);
+    const result = new inputData.constructor(inputData.length / inputDims[dim]);
 
     // Iterate over the data array
-    for (let i = 0; i < input.data.length; ++i) {
+    for (let i = 0; i < inputData.length; ++i) {
 
         // Calculate the index in the resulting array
         let resultIndex = 0;
 
-        for (let j = input.dims.length - 1, num = i, resultMultiplier = 1; j >= 0; --j) {
-            const size = input.dims[j];
+        for (let j = inputDims.length - 1, num = i, resultMultiplier = 1; j >= 0; --j) {
+            const size = inputDims[j];
             if (j !== dim) {
                 const index = num % size;
                 resultIndex += index * resultMultiplier;
@@ -992,11 +1220,11 @@ export function std_mean(input, dim = null, correction = 1, keepdim = false) {
         }
 
         // Accumulate the value at the current index
-        result[resultIndex] += (input.data[i] - meanTensor.data[resultIndex]) ** 2;
+        result[resultIndex] += (inputData[i] - meanTensorData[resultIndex]) ** 2;
     }
 
     for (let i = 0; i < result.length; ++i) {
-        result[i] = Math.sqrt(result[i] / (input.dims[dim] - correction));
+        result[i] = Math.sqrt(result[i] / (inputDims[dim] - correction));
     }
 
     if (!keepdim) {
@@ -1014,36 +1242,38 @@ export function std_mean(input, dim = null, correction = 1, keepdim = false) {
  * @param {Tensor} input the input tensor.
  * @param {number|null} dim the dimension to reduce.
  * @param {boolean} keepdim whether the output tensor has dim retained or not.
- * @returns A new tensor with means taken along the specified dimension.
+ * @returns {Tensor} A new tensor with means taken along the specified dimension.
  */
 export function mean(input, dim = null, keepdim = false) {
+    const inputData = /** @type {Float32Array} */(input.data);
 
     if (dim === null) {
         // None to reduce over all dimensions.
         // @ts-ignore
-        let val = input.data.reduce((a, b) => a + b, 0);
-        return new Tensor(input.type, [val / input.data.length], [/* scalar */]);
+        const val = inputData.reduce((a, b) => a + b, 0);
+        return new Tensor(input.type, [val / inputData.length], [/* scalar */]);
     }
+    const inputDims = input.dims;
 
     // Negative indexing
-    dim = safeIndex(dim, input.dims.length);
+    dim = safeIndex(dim, inputDims.length);
 
     // Calculate the shape of the resulting array after summation
-    const resultDims = input.dims.slice(); // Copy the original dimensions
+    const resultDims = inputDims.slice(); // Copy the original dimensions
     resultDims[dim] = 1; // Remove the specified axis
 
     // Create a new array to store the accumulated values
     // @ts-ignore
-    const result = new input.data.constructor(input.data.length / input.dims[dim]);
+    const result = new inputData.constructor(inputData.length / inputDims[dim]);
 
     // Iterate over the data array
-    for (let i = 0; i < input.data.length; ++i) {
+    for (let i = 0; i < inputData.length; ++i) {
 
         // Calculate the index in the resulting array
         let resultIndex = 0;
 
-        for (let j = input.dims.length - 1, num = i, resultMultiplier = 1; j >= 0; --j) {
-            const size = input.dims[j];
+        for (let j = inputDims.length - 1, num = i, resultMultiplier = 1; j >= 0; --j) {
+            const size = inputDims[j];
             if (j !== dim) {
                 const index = num % size;
                 resultIndex += index * resultMultiplier;
@@ -1053,12 +1283,12 @@ export function mean(input, dim = null, keepdim = false) {
         }
 
         // Accumulate the value at the current index
-        result[resultIndex] += input.data[i];
+        result[resultIndex] += inputData[i];
     }
 
-    if (input.dims[dim] !== 1) {
+    if (inputDims[dim] !== 1) {
         for (let i = 0; i < result.length; ++i) {
-            result[i] = result[i] / input.dims[dim];
+            result[i] = result[i] / inputDims[dim];
         }
     }
 
@@ -1070,99 +1300,6 @@ export function mean(input, dim = null, keepdim = false) {
 }
 
 
-/**
- *
- * Measures similarity between two temporal sequences (e.g., input audio and output tokens
- * to generate token-level timestamps).
- * @param {Tensor} matrix 
- * @returns {number[][]}
- */
-export function dynamicTimeWarping(matrix) {
-    const [output_length, input_length] = matrix.dims;
-
-    const outputShape = [output_length + 1, input_length + 1];
-
-    const cost = new Tensor(
-        'float32',
-        new Float32Array(outputShape[0] * outputShape[1]).fill(Infinity),
-        outputShape
-    );
-
-    const trace = new Tensor(
-        'float32',
-        new Float32Array(outputShape[0] * outputShape[1]).fill(-1),
-        outputShape
-    )
-
-    // same as `cost[0][0] = 0`;
-    cost[0].data[0] = 0;
-
-    for (let j = 1; j < input_length + 1; ++j) {
-        for (let i = 1; i < output_length + 1; ++i) {
-
-            const c0 = cost[i - 1][j - 1].item();
-            const c1 = cost[i - 1][j].item();
-            const c2 = cost[i][j - 1].item();
-
-            let c, t;
-            if (c0 < c1 && c0 < c2) {
-                c = c0;
-                t = 0;
-            } else if (c1 < c0 && c1 < c2) {
-                c = c1;
-                t = 1;
-            } else {
-                c = c2;
-                t = 2;
-            }
-
-            cost[i].data[j] = matrix[i - 1][j - 1].item() + c;
-            trace[i].data[j] = t;
-        }
-    }
-
-    // backtrace
-    let i = output_length;
-    let j = input_length;
-
-    // @ts-ignore
-    trace.data.fill(2, 0, outputShape[1]) // trace[0, :] = 2
-    for (let i = 0; i < outputShape[0]; ++i) { // trace[:, 0] = 1
-        trace[i].data[0] = 1;
-    }
-
-    let text_indices = [];
-    let time_indices = [];
-
-    while (i > 0 || j > 0) {
-        text_indices.push(i - 1);
-        time_indices.push(j - 1);
-
-        const t = trace[i][j].item();
-        switch (t) {
-            case 0:
-                --i; --j;
-                break;
-            case 1:
-                --i;
-                break;
-            case 2:
-                --j;
-                break;
-            default:
-                throw new Error(
-                    `Internal error in dynamic time warping. Unexpected trace[${i}, ${j}]. Please file a bug report.`
-                )
-        }
-    }
-
-    text_indices.reverse();
-    time_indices.reverse();
-
-    return [text_indices, time_indices];
-
-}
-
 function dimsToStride(dims) {
     const stride = new Array(dims.length);
     for (let i = dims.length - 1, s2 = 1; i >= 0; --i) {
@@ -1172,28 +1309,77 @@ function dimsToStride(dims) {
     return stride;
 }
 
+function fullHelper(size, fill_value, dtype, cls) {
+    const numElements = size.reduce((a, b) => a * b, 1);
+    return new Tensor(
+        dtype,
+        new cls(numElements).fill(fill_value),
+        size
+    )
+}
+
+/**
+ * Creates a tensor of size size filled with fill_value. The tensor's dtype is inferred from fill_value.
+ * @param {number[]} size A sequence of integers defining the shape of the output tensor.
+ * @param {number|bigint} fill_value The value to fill the output tensor with.
+ * @returns {Tensor} The filled tensor.
+ */
+export function full(size, fill_value) {
+    let dtype;
+    let typedArrayCls;
+    if (typeof fill_value === 'number') {
+        dtype = 'float32';
+        typedArrayCls = Float32Array;
+    } else if (typeof fill_value === 'bigint') {
+        dtype = 'int64';
+        typedArrayCls = BigInt64Array;
+    } else {
+        // TODO: support other dtypes
+        throw new Error(`Unsupported data type: ${typeof fill_value}`);
+    }
+    return fullHelper(size, fill_value, dtype, typedArrayCls);
+}
+
+export function full_like(tensor, fill_value) {
+    return full(tensor.dims, fill_value);
+}
+
 /**
  * Returns a tensor filled with the scalar value 1, with the shape defined by the variable argument size.
  * @param {number[]} size A sequence of integers defining the shape of the output tensor.
+ * @returns {Tensor} The ones tensor.
  */
 export function ones(size) {
-    const numElements = size.reduce((a, b) => a * b, 1);
-    return new Tensor(
-        'int64',
-        new BigInt64Array(numElements).fill(1n),
-        size
-    )
+    return fullHelper(size, 1n, 'int64', BigInt64Array);
 }
 
 /**
  * Returns a tensor filled with the scalar value 1, with the same size as input.
  * @param {Tensor} tensor The size of input will determine size of the output tensor.
- * @returns The ones tensor.
+ * @returns {Tensor} The ones tensor.
  */
 export function ones_like(tensor) {
     return ones(tensor.dims);
 }
 
+/**
+ * Returns a tensor filled with the scalar value 0, with the shape defined by the variable argument size.
+ * @param {number[]} size A sequence of integers defining the shape of the output tensor.
+ * @returns {Tensor} The zeros tensor.
+ */
+export function zeros(size) {
+    return fullHelper(size, 0n, 'int64', BigInt64Array);
+}
+
+/**
+ * Returns a tensor filled with the scalar value 0, with the same size as input.
+ * @param {Tensor} tensor The size of input will determine size of the output tensor.
+ * @returns {Tensor} The zeros tensor.
+ */
+export function zeros_like(tensor) {
+    return zeros(tensor.dims);
+}
+
 /**
  * Quantizes the embeddings tensor to binary or unsigned binary precision.
  * @param {Tensor} tensor The tensor to quantize.
diff --git a/tests/configs.test.js b/tests/configs.test.js
index 8cdfe28c7..f66a8a887 100644
--- a/tests/configs.test.js
+++ b/tests/configs.test.js
@@ -1,25 +1,23 @@
-
-
-import { AutoConfig, env } from '../src/transformers.js';
-import { getFile } from '../src/utils/hub.js';
-import { m } from './init.js';
+import { AutoConfig, env } from "../src/transformers.js";
+import { getFile } from "../src/utils/hub.js";
 
 // Initialise the testing environment
-env.allowLocalModels=false;
-env.useFSCache=false;
-
-// Load test data generated by the python tests
-// TODO do this dynamically?
-let testsData = await (await getFile('./tests/data/config_tests.json')).json()
-
-describe('Configs', () => {
+env.allowLocalModels = false;
+env.useFSCache = false;
 
-    for (let [configName, targetConfig] of Object.entries(testsData)) {
+const TEST_DATA = {
+  "Xenova/bert-base-uncased": {
+    model_type: "bert",
+  },
+};
 
-        it(configName, async () => {
-            let config = await AutoConfig.from_pretrained(m(configName));
-            expect(config.model_type).toEqual(targetConfig.model_type);
-            expect(config.is_encoder_decoder).toEqual(targetConfig.is_encoder_decoder);
-        });
-    }
+describe("Configs", () => {
+  for (const [model_id, minimal_config] of Object.entries(TEST_DATA)) {
+    it(model_id, async () => {
+      const config = await AutoConfig.from_pretrained(model_id);
+      for (const [key, value] of Object.entries(minimal_config)) {
+        expect(config[key]).toEqual(value);
+      }
+    });
+  }
 });
diff --git a/tests/data/.gitignore b/tests/data/.gitignore
deleted file mode 100644
index 5b8e8d398..000000000
--- a/tests/data/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-# Folder to store generated test data
-# Do not commit these files to the repository
-*.json
diff --git a/tests/generate_tests.py b/tests/generate_tests.py
deleted file mode 100644
index 3f103778b..000000000
--- a/tests/generate_tests.py
+++ /dev/null
@@ -1,467 +0,0 @@
-# Helper file to dynamically generate unit tests
-# This is done by running the python Transformers library and comparing its outputs with ours.
-
-import json
-import os
-from itertools import product
-
-from transformers import AutoTokenizer, AutoConfig
-import numpy as np
-
-from scripts.supported_models import SUPPORTED_MODELS
-
-# List of tokenizers where the model isn't yet supported, but the tokenizer is
-ADDITIONAL_TOKENIZERS_TO_TEST = {
-    'falcon': [
-        'tiiuae/falcon-7b',
-    ],
-    "llama": [
-        'hf-internal-testing/llama-tokenizer',  # Special tokens: normalized=true
-        'Xenova/llama2-tokenizer',  # Special tokens: normalized=false
-        'Xenova/llama2-chat-tokenizer',  # Special tokens: normalized=false
-        'hf-internal-testing/llama-code-tokenizer',
-
-        # TODO: add back when llama tests are fixed
-        # 'Xenova/llama3-tokenizer-new',  # PostProcessor type: Sequence
-    ],
-    'mpt': [
-        'mosaicml/mpt-7b',
-    ],
-    't5': [
-        # TODO: Add back when https://github.com/huggingface/transformers/issues/26318 is fixed
-        # 'Xenova/t5-tokenizer-new',
-    ],
-    'bert': [
-        # Uses `Whitespace` pretokenizer
-        'Xenova/jina-embeddings-v2-base-zh-tokenizer',
-    ],
-    'qwen2': [
-        # Uses a pretokenizer regex which is not compatible with JavaScript.
-        'Qwen/Qwen1.5-0.5B-Chat',
-    ],
-    'gemma': [
-        'Xenova/gemma-tokenizer',
-    ],
-}
-
-MODELS_TO_IGNORE = [
-    # TODO: remove when https://github.com/huggingface/tokenizers/issues/251 is fixed
-    'xlm',
-
-    # TODO: remove when https://github.com/huggingface/transformers/issues/26018 is fixed
-    'marian',
-
-    # TODO: remove when https://github.com/huggingface/transformers/issues/26547 is fixed
-    'speecht5',
-
-    # TODO: remove when https://github.com/huggingface/transformers/pull/26522 is merged
-    'siglip',
-
-    # TODO: remove when https://github.com/huggingface/transformers/issues/28164 is fixed
-    'roformer',
-
-    # TODO: remove when https://github.com/huggingface/transformers/issues/28173 is fixed. Issues include:
-    # - decoding with `skip_special_tokens=True`.
-    # - interspersing the pad token is broken.
-    'vits',
-]
-
-TOKENIZERS_TO_IGNORE = [
-    # TODO: remove when https://github.com/huggingface/transformers/pull/25478 is merged
-    'facebook/m2m100_418M',
-
-    # TODO: remove when https://github.com/huggingface/transformers/issues/28096 is addressed
-    'RajuKandasamy/tamillama_tiny_30m',
-
-    # Requires `trust_remote_code`
-    'monologg/kobert',
-]
-
-MAX_TESTS = {
-    'marian': 10,
-}
-
-TOKENIZER_TEST_DATA = {
-    "shared": [
-        "hello world",
-        "Hello World",
-        "How are you doing?",
-        "You should've done this",
-        "A\n'll !!to?'d''d of, can't.",
-        "def main():\n\tpass",
-        "This\n\nis\na\ntest.",
-        "let a = obj.toString();\ntoString();",
-        'Hi  Hello',
-        "trailing space   ",
-        "   leading space",
-        "生活的真谛是",
-        "The company was founded in 2016.",
-        "test $1 R2 #3 €4 £5 ¥6 ₣7 ₹8 ₱9 test",
-        "I bought an apple for $1.00 at the store.",
-        "you…  ",
-        "\u0079\u006F\u0075\u2026\u00A0\u00A0",
-        "\u0079\u006F\u0075\u2026\u00A0\u00A0\u0079\u006F\u0075\u2026\u00A0\u00A0",
-        "▁This ▁is ▁a ▁test ▁.",
-        "weird \uFF5E edge \uFF5E case",
-
-        # SentencePiece-specific test cases
-        "<s>\n",
-        " </s> test </s> ",
-        "</s>test</s>",
-
-        # Control characters
-        "1\u00002\uFFFD3",
-    ],
-    "custom_by_model_type": {
-        "llama": [
-            # Additional test-cases for the Llama tokenizer, adapted from
-            # https://github.com/belladoreai/llama-tokenizer-js/blob/master/llama-tokenizer.js#L381-L452
-            "grabbed",
-            " grabbed",
-            "           grabbed",
-            "\n",
-            " \n",
-            "	tabs				out here",
-            "\n\t\n",
-            "ax\n####\nboo",
-            "镇",
-            "🦙",
-            "🦙Ꙋ",
-            "Ꙋ🦙",
-            "The llama (/ˈlɑːmə/; 🦙Spanish pronunciation: [ˈʎama]) (Lama glama) is a domesticated South American " \
-            "camelid, widely used as a meat and pack animal by Andean cultures since the Pre-Columbian era. Llamas " \
-            "are social animals and live with others as a herd. Their wool is soft and contains only a small " \
-            "amount of lanolin.[2] Llamas can learn simple tasks after a few repetitions. When using a pack, they " \
-            "can carry about 25 to 30% of their body weight for 8 to 13 km (5–8 miles).[3] The name llama (in the " \
-            "past also spelled \"lama\" or \"glama\") was adopted by European settlers from native Peruvians.[4] " \
-            "The ancestors of llamas are thought to have originated from the Great Plains of North America about " \
-            "40 million years ago, and subsequently migrated to South America about three million years ago during " \
-            "the Great American Interchange. By the end of the last ice age (10,000–12,000 years ago), camelids were " \
-            "extinct in North America.[3] As of 2007, there were over seven million llamas and alpacas in South " \
-            "America and over 158,000 llamas and 100,000Ꙋ🦙 alpacas, descended from progenitors imported late in " \
-            "the 20th century, in the United States and Canada.[5] In Aymara mythology, llamas are important beings. " \
-            "The Heavenly Llama is said to drink water from the ocean and urinates as it rains.[6] According to " \
-            "Aymara eschatology, llamas will return to the water springs and lagoons where they come from at the " \
-            "end of time.[6]",
-        ],
-
-        "vits": [
-            "abcdefghijklmnopqrstuvwxyz01234567890",
-            # Special treatment of characters in certain language
-            "ț ţ",
-        ],
-
-        "qwen2": [
-            "i'm i'M i've i've i'Ve i'vE i'VE",
-        ],
-    },
-    "custom": {
-        "facebook/blenderbot_small-90M": [
-            # Test special tokens
-            "__start__hello world__end__",
-            # The original (python) tokenizer simply joins by spaces (regardless of special tokens or not)
-            "__start__ hey __end__"  # --> ... --> "__start__ hey __end__"
-            "__start__hey __end__"  # --> ... --> "__start__ hey __end__"
-        ],
-        "tiiuae/falcon-7b": [
-            "12 and 123 and 1234",  # Special case for splitting on 3 numbers
-        ],
-        "InstaDeepAI/nucleotide-transformer-500m-human-ref": [
-            # Actual protein sequences
-            "ATTCCGATTCCGATTCCG",
-            "ATTTCTCTCTCTCTCTGAGATCGATCGATCGAT",
-
-            # Special tokens
-            "<unk><pad><mask><cls><eos><bos>",
-        ],
-
-        "distil-whisper/distil-small.en": [
-            "   <|startoftranscript|> <|en|>   ",  # Tests lstrip+rstrip
-        ],
-
-        "Xenova/t5-tokenizer-new": [
-            # Tests the new T5 tokenizer, which uses a different prepend_scheme for its pre_tokenizer:
-            # tokenizer._tokenizer.pre_tokenizer = Metaspace(add_prefix_space = True, replacement = "▁", prepend_scheme = "first")
-            # See https://github.com/huggingface/transformers/pull/26678 for more information.
-            #  - Old (incorrect): ['▁Hey', '▁', '</s>', '▁', '.', '▁how', '▁are', '▁you']
-            #  - New (correct):   ['▁Hey', '▁', '</s>', '.', '▁how', '▁are', '▁you']
-            "Hey </s>. how are you",
-        ],
-    },
-}
-
-TOKENIZER_TEXT_PAIR_TEST_DATA = [
-    {
-        'text': 'a',
-        'text_pair': 'b'
-    },
-    {
-        'text': 'a b',
-        'text_pair': 'c d e'
-    },
-    {
-        'text': ['a b c', 'd'],
-        'text_pair': ['e f', 'g h'],
-    },
-    {
-        'text': ['a', 'b c', 'd e f'],
-        'text_pair': ['g h i', 'j k', 'l'],
-    }
-]
-
-CHAT_MESSAGES_EXAMPLES = {
-    'basic': [
-        {"role": "user", "content": "Hello, how are you?"},
-        {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
-        {"role": "user", "content": "I'd like to show off how chat templating works!"},
-    ],
-
-    'system': [
-        {"role": "system", "content": "You are a friendly chatbot who always responds in the style of a pirate"},
-        {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
-    ],
-
-    'system + assistant': [
-        {"role": "system", "content": "You are a friendly chatbot who always responds in the style of a pirate"},
-        {"role": "user", "content": "Hello, how are you?"},
-        {"role": "assistant", "content": "I'm doing great. How can I help you today?"},
-        {"role": "user", "content": "I'd like to show off how chat templating works!"},
-    ],
-}
-
-TOKENIZERS_WITH_CHAT_TEMPLATES = {
-    # https://huggingface.co/docs/transformers/main/en/chat_templating
-    'Xenova/mistral-tokenizer-v1': [
-        'basic',
-    ],
-
-    'HuggingFaceH4/zephyr-7b-beta': [
-        'system',
-    ],
-
-    'Xenova/llama2-chat-tokenizer': [
-        'basic',
-        'system',
-        'system + assistant',
-    ],
-}
-
-
-FLATTENED_SUPPORTED_MODELS = [
-    (model_type, [
-        model for task_models in tasks.values() for model in task_models
-    ]) for model_type, tasks in SUPPORTED_MODELS.items()
-]
-
-
-def generate_tokenizer_tests():
-
-    tokenization_results = {}
-
-    tokenizers_to_test = FLATTENED_SUPPORTED_MODELS + \
-        list(ADDITIONAL_TOKENIZERS_TO_TEST.items())
-
-    for model_type, tokenizer_names in tokenizers_to_test:
-        if model_type in MODELS_TO_IGNORE:
-            continue
-        if model_type in MAX_TESTS:
-            tokenizer_names = tokenizer_names[:MAX_TESTS[model_type]]
-
-        custom_by_model_type_texts = TOKENIZER_TEST_DATA["custom_by_model_type"].get(
-            model_type, [])
-
-        print(f'Generating tests for {model_type}')
-        for tokenizer_name in tokenizer_names:
-            if tokenizer_name in TOKENIZERS_TO_IGNORE:
-                continue
-
-            print('  -', tokenizer_name)
-
-            try:
-                # Load tokenizer
-                if model_type == 'llama':
-                    # As of 17/12/2023, there are a few issues with the Llama tokenizers in transformers.
-                    # (1) Encoding with fast tokenizer adds whitespace after special tokens:
-                    #   - https://github.com/huggingface/transformers/issues/25881
-                    #   - https://github.com/huggingface/transformers/issues/26318
-                    #   - https://github.com/huggingface/transformers/issues/26455
-                    #   - https://github.com/huggingface/transformers/issues/27544
-                    # (2) Decoding with slow tokenizer adds whitespace after special tokens:
-                    #   - https://github.com/huggingface/transformers/issues/25073
-                    #
-                    # So for now, we mix and match the tokenizers:
-                    # i.e., use the fast tokenizer for encoding, and the slow tokenizer for decoding.
-                    # TODO: remove when the above issues are fixed:
-                    tokenizer = AutoTokenizer.from_pretrained(
-                        tokenizer_name,
-                        use_fast=False,
-                    )
-                    decoder_tokenizer = AutoTokenizer.from_pretrained(
-                        tokenizer_name,
-                        use_fast=True,
-                    )
-
-                else:
-                    decoder_tokenizer = tokenizer = AutoTokenizer.from_pretrained(
-                        tokenizer_name)
-
-            except (KeyError, EnvironmentError):
-                # If a KeyError/EnvironmentError is raised from the AutoTokenizer, it
-                # means the model does not use a tokenizer (e.g., vision models)
-                continue
-
-            try:
-                # Disable dropout, if the model allows it
-                tokenizer.backend_tokenizer.model.dropout = 0
-            except AttributeError:
-                pass
-
-            tokenizer_results = []
-
-            for data in TOKENIZER_TEXT_PAIR_TEST_DATA:
-                try:
-                    output = tokenizer(**data).data
-                except Exception:
-                    # Ignore testing tokenizers which fail in the python library
-                    continue
-                tokenizer_results.append(dict(
-                    input=data,
-                    output=output,
-                ))
-
-            shared_texts = TOKENIZER_TEST_DATA["shared"]
-            custom_texts = TOKENIZER_TEST_DATA["custom"].get(
-                tokenizer_name, [])
-
-            # Run tokenizer on test cases
-            for text in shared_texts + custom_texts + custom_by_model_type_texts:
-                try:
-                    encoded = tokenizer(text).data
-                except Exception:
-                    # Ignore testing tokenizers which fail in the python library
-                    continue
-
-                decoded_with_special = decoder_tokenizer.decode(
-                    encoded["input_ids"], skip_special_tokens=False)
-                decoded_without_special = decoder_tokenizer.decode(
-                    encoded["input_ids"], skip_special_tokens=True)
-
-                tokenizer_results.append(dict(
-                    input=text,
-                    encoded=encoded,
-                    decoded_with_special=decoded_with_special,
-                    decoded_without_special=decoded_without_special,
-                ))
-
-            if tokenizer_results:
-                tokenization_results[tokenizer_name] = tokenizer_results
-
-    template_results = {}
-
-    for tokenizer_id in TOKENIZERS_WITH_CHAT_TEMPLATES:
-        print(f'Generating chat templates for {tokenizer_id}')
-        tokenizer = AutoTokenizer.from_pretrained(
-            tokenizer_id,
-
-            # TODO: Remove once https://github.com/huggingface/transformers/pull/26678 is fixed
-            use_fast='llama' not in tokenizer_id,
-        )
-        tokenizer_results = []
-        for key in TOKENIZERS_WITH_CHAT_TEMPLATES[tokenizer_id]:
-            messages = CHAT_MESSAGES_EXAMPLES[key]
-
-            for add_generation_prompt, tokenize in product([True, False], [True, False]):
-                tokenizer_results.append(dict(
-                    messages=messages,
-                    add_generation_prompt=add_generation_prompt,
-                    tokenize=tokenize,
-                    target=tokenizer.apply_chat_template(
-                        messages,
-                        add_generation_prompt=add_generation_prompt,
-                        tokenize=tokenize,
-                    ),
-                ))
-
-        template_results[tokenizer_id] = tokenizer_results
-
-    return dict(
-        tokenization=tokenization_results,
-        templates=template_results,
-    )
-
-
-def generate_config_tests():
-    results = {}
-    for model_type, config_names in FLATTENED_SUPPORTED_MODELS:
-        print(f'Generating tests for {model_type}')
-
-        for config_name in config_names:
-            print('  -', config_name)
-            try:
-                # Load config
-                config = AutoConfig.from_pretrained(config_name)
-            except Exception:
-                # Something went wrong, skip this config
-                continue
-            results[config_name] = config.to_dict()
-
-            # TODO: Remove after https://github.com/huggingface/transformers/issues/23876 fixed
-            results[config_name].pop('torch_dtype', None)
-
-    return results
-
-
-ARRAY_SIZES = sorted(set([2 ** i for i in range(1, 10)])
-                     | set([3 ** i for i in range(1, 8)])
-                     | set([5 ** i for i in range(1, 6)])
-                     | set([7 ** i for i in range(1, 4)]))
-
-
-def serialize_complex_array(arr):
-    return [float(x) for y in arr for x in [y.real, y.imag]]
-
-
-def serialize_real_array(arr):
-    return arr.tolist()
-
-
-def generate_fft_tests():
-    np.random.seed(0)
-    tests = {}
-    for complex in [False, True]:
-        serialize_fn = serialize_complex_array if complex else serialize_real_array
-        for size in ARRAY_SIZES:
-            arr = np.random.randn(size).astype(
-                np.complex64 if complex else np.float64)
-            if complex:
-                arr += np.random.randn(size) * 1j
-            tests[f"fft_{size}_{'complex' if complex else 'real'}"] = {
-                "complex": complex,
-                "input": serialize_fn(arr),
-                "output": serialize_complex_array(np.fft.fft(arr)),
-            }
-    return tests
-
-
-def main():
-    # TODO add option to cache generated data + force build tests
-
-    data_dir = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "data",
-    )
-
-    tokenizer_tests = generate_tokenizer_tests()
-    with open(os.path.join(data_dir, "tokenizer_tests.json"), "w", encoding="utf-8") as fp:
-        json.dump(tokenizer_tests, fp)
-
-    config_tests = generate_config_tests()
-    with open(os.path.join(data_dir, "config_tests.json"), "w", encoding="utf-8") as fp:
-        json.dump(config_tests, fp)
-
-    fft_tests = generate_fft_tests()
-    with open(os.path.join(data_dir, "fft_tests.json"), "w", encoding="utf-8") as fp:
-        json.dump(fft_tests, fp)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/tests/generation.test.js b/tests/generation.test.js
deleted file mode 100644
index da50388aa..000000000
--- a/tests/generation.test.js
+++ /dev/null
@@ -1,173 +0,0 @@
-
-import { pipeline } from '../src/transformers.js';
-import { init, m, MAX_TEST_EXECUTION_TIME } from './init.js';
-
-// Initialise the testing environment
-init();
-
-describe('Generation parameters', () => {
-
-    // List all models which will be tested
-    const models = [
-        'MBZUAI/LaMini-Flan-T5-77M', // encoder-decoder
-        'MBZUAI/LaMini-GPT-124M', // decoder-only
-
-        'Xenova/llama2.c-stories15M', // decoder-only
-    ];
-
-    // encoder-decoder model
-    it(models[0], async () => {
-        const text = 'how can I become more healthy?';
-
-        const generator = await pipeline('text2text-generation', m(models[0]));
-
-        // default
-        // NOTE: Since `max_length` defaults to 20, this case also tests that.
-        {
-            const outputs = await generator(text);
-
-            const tokens = generator.tokenizer.encode(outputs[0].generated_text)
-            expect(tokens.length).toEqual(20);
-        }
-
-        // max_new_tokens
-        {
-            // NOTE: Without setting `min_new_tokens` (but setting `max_new_tokens`), 64 tokens are generated.
-            // So, the following tests are valid.
-            const MAX_NEW_TOKENS = 20;
-            const outputs = await generator(text, {
-                max_new_tokens: MAX_NEW_TOKENS,
-            });
-
-            const tokens = generator.tokenizer.encode(outputs[0].generated_text)
-            expect(tokens.length).toEqual(MAX_NEW_TOKENS + 1); // + 1 due to forced BOS token
-        }
-
-        // min_length
-        {
-            // NOTE: Without setting `min_length` (but setting `max_new_tokens`), 64 tokens are generated.
-            // So, the following tests are valid.
-            const MAX_NEW_TOKENS = 128;
-            const MIN_LENGTH = 65;
-            const outputs = await generator(text, {
-                max_new_tokens: MAX_NEW_TOKENS,
-                min_length: MIN_LENGTH,
-            });
-
-            const tokens = generator.tokenizer.encode(outputs[0].generated_text)
-            expect(tokens.length).toBeGreaterThanOrEqual(MIN_LENGTH);
-        }
-
-        // min_new_tokens
-        {
-            // NOTE: Without setting `min_new_tokens` (but setting `max_new_tokens`), 64 tokens are generated.
-            // So, the following tests are valid.
-            const MAX_NEW_TOKENS = 128;
-            const MIN_NEW_TOKENS = 65;
-            const outputs = await generator(text, {
-                max_new_tokens: MAX_NEW_TOKENS,
-                min_new_tokens: MIN_NEW_TOKENS,
-            });
-
-            const tokens = generator.tokenizer.encode(outputs[0].generated_text)
-            expect(tokens.length).toBeGreaterThanOrEqual(MIN_NEW_TOKENS);
-        }
-
-        await generator.dispose();
-
-    }, MAX_TEST_EXECUTION_TIME);
-
-    // decoder-only model
-    it(models[1], async () => {
-        const text = "### Instruction:\nTrue or False: The earth is flat?\n\n### Response: ";
-
-        const generator = await pipeline('text-generation', m(models[1]));
-
-        // default
-        // NOTE: Since `max_length` defaults to 20, this case also tests that.
-        {
-            const outputs = await generator(text);
-            const tokens = generator.tokenizer.encode(outputs[0].generated_text)
-            expect(tokens.length).toEqual(20);
-        }
-
-        // max_new_tokens
-        {
-            const MAX_NEW_TOKENS = 20;
-            const outputs = await generator(text, {
-                max_new_tokens: MAX_NEW_TOKENS,
-            });
-            const promptTokens = generator.tokenizer.encode(text)
-            const tokens = generator.tokenizer.encode(outputs[0].generated_text)
-            expect(tokens.length).toBeGreaterThan(promptTokens.length);
-        }
-
-        // min_length
-        {
-            // NOTE: Without setting `min_length` (but setting `max_new_tokens`), 22 tokens are generated.
-            // So, the following tests are valid.
-            const MAX_NEW_TOKENS = 10;
-            const MIN_LENGTH = 25;
-            const outputs = await generator(text, {
-                max_new_tokens: MAX_NEW_TOKENS,
-                min_length: MIN_LENGTH,
-            });
-
-            const tokens = generator.tokenizer.encode(outputs[0].generated_text)
-            expect(tokens.length).toBeGreaterThanOrEqual(MIN_LENGTH);
-        }
-
-        // min_new_tokens
-        {
-            // NOTE: Without setting `min_new_tokens` (but setting `max_new_tokens`), 22 tokens are generated.
-            // So, the following tests are valid.
-            const MAX_NEW_TOKENS = 32;
-            const MIN_NEW_TOKENS = 10;
-            const outputs = await generator(text, {
-                max_new_tokens: MAX_NEW_TOKENS,
-                min_new_tokens: MIN_NEW_TOKENS,
-            });
-
-            const tokens = generator.tokenizer.encode(outputs[0].generated_text)
-            const promptTokens = generator.tokenizer.encode(text)
-            expect(tokens.length).toBeGreaterThanOrEqual(promptTokens.length + MIN_NEW_TOKENS);
-        }
-
-        await generator.dispose();
-
-    }, MAX_TEST_EXECUTION_TIME);
-
-    // decoder-only model
-    it(models[2], async () => {
-        const MAX_NEW_TOKENS = 1;
-
-        const text = [
-            'Once upon a time,',
-            'Lily',
-            'Suddenly,',
-        ];
-
-        const generator = await pipeline('text-generation', m(models[2]));
-
-        { // return_full_text=false
-            const output = await generator(text, {
-                return_full_text: false,
-                max_new_tokens: MAX_NEW_TOKENS,
-                num_beams: 2,
-                num_return_sequences: 2,
-            });
-            const lengths = output.flatMap(
-                x => x.flatMap(
-                    y => generator.tokenizer.encode(y.generated_text.trim(), null, {
-                        add_special_tokens: false,
-                    }).length
-                )
-            ).every(x => x === MAX_NEW_TOKENS);
-
-            expect(lengths).toBe(true);
-        }
-        await generator.dispose();
-
-    }, MAX_TEST_EXECUTION_TIME);
-
-});
\ No newline at end of file
diff --git a/tests/hub.test.js b/tests/hub.test.js
deleted file mode 100644
index 76c0ad143..000000000
--- a/tests/hub.test.js
+++ /dev/null
@@ -1,35 +0,0 @@
-
-
-import { AutoModel, PreTrainedModel } from '../src/transformers.js';
-import { MAX_TEST_EXECUTION_TIME } from './init.js';
-
-// TODO: Set cache folder to a temp directory
-
-describe('Hub', () => {
-
-    describe('Loading models', () => {
-
-        it('should load a model from the local cache', async () => {
-            // 1. Local model exists (doesn't matter about status of remote file since local is tried first)
-            let model = await AutoModel.from_pretrained('t5-small');
-            expect(model).toBeInstanceOf(PreTrainedModel);
-        }, MAX_TEST_EXECUTION_TIME);
-
-        it('should load a model from the remote cache', async () => {
-            // 2. Local model doesn't exist, remote file exists
-            // This tests that fallback functionality is working
-            let model = await AutoModel.from_pretrained('Xenova/t5-small');
-            expect(model).toBeInstanceOf(PreTrainedModel);
-        }, MAX_TEST_EXECUTION_TIME);
-
-        it('should fail to load a model', async () => {
-            // 3. Local model doesn't exist, remote file doesn't exist
-            // This tests that error handling is working.
-            await expect(
-                AutoModel.from_pretrained('Xenova/this-model-does-not-exist')
-            ).rejects
-                .toBeInstanceOf(Error);
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-});
diff --git a/tests/init.js b/tests/init.js
index 2fcd8609e..65f079086 100644
--- a/tests/init.js
+++ b/tests/init.js
@@ -1,100 +1,64 @@
 // Helper functions used when initialising the testing environment.
 
-
 // Import Node typing utilities
 import * as types from "node:util/types";
 
 // Import onnxruntime-node's default backend
 import { onnxruntimeBackend } from "onnxruntime-node/dist/backend";
-import ONNX_COMMON from "onnxruntime-common";
+import * as ONNX_COMMON from "onnxruntime-common";
 
+/**
+ * A workaround to define a new backend for onnxruntime, which
+ * will not throw an error when running tests with jest.
+ * For more information, see: https://github.com/jestjs/jest/issues/11864#issuecomment-1261468011
+ */
 export function init() {
-    // In rare cases (specifically when running unit tests with GitHub actions), possibly due to
-    // a large number of concurrent executions, onnxruntime might fallback to use the WASM backend.
-    // In this case, we set the number of threads to 1 to avoid errors like:
-    //  - `TypeError: The worker script or module filename must be an absolute path or a relative path starting with './' or '../'. Received "blob:nodedata:..."`
-    ONNX_COMMON.env.wasm.numThreads = 1;
-
-    // A workaround to define a new backend for onnxruntime, which
-    // will not throw an error when running tests with jest.
-    // For more information, see: https://github.com/jestjs/jest/issues/11864#issuecomment-1261468011
-
-    let registerBackend = ONNX_COMMON.registerBackend;
-
-    // Define the constructors to monkey-patch
-    const TYPED_ARRAYS_CONSTRUCTOR_NAMES = [
-        "Int8Array",
-        "Int16Array",
-        "Int32Array",
-        "BigInt64Array",
-        "Uint8Array",
-        "Uint8ClampedArray",
-        "Uint16Array",
-        "Uint32Array",
-        "BigUint64Array",
-        "Float32Array",
-        "Float64Array",
-    ];
-
-    // Keep a reference to the original initialization method
-    const originalMethod = onnxruntimeBackend.init;
-
-    // Monkey-patch the initialization function
-    onnxruntimeBackend.init = function (...args) {
-        // There is probably a better way to do this
-        Array.isArray = x =>
-            typeof x === "object" &&
-            x !== null &&
-            typeof x.length === "number" &&
-            x?.constructor.toString() === Array.toString();
-
-        // For each typed array constructor
-        for (const ctorName of TYPED_ARRAYS_CONSTRUCTOR_NAMES) {
-            // Get the constructor from the current context
-            const ctor = global[ctorName];
-
-            // Get the corresponding test function from the `util` module
-            const value = types[`is${ctorName}`].bind(types);
-
-            // Monkey-patch the constructor so "x instanceof ctor" returns "types[`is${ctorName}`](x)"
-            Object.defineProperty(ctor, Symbol.hasInstance, {
-                value,
-                writable: false,
-                configurable: false,
-                enumerable: false,
-            });
-        }
-
-        // Call the original method
-        return originalMethod.apply(this, args);
-    };
+  // In rare cases (specifically when running unit tests with GitHub actions), possibly due to
+  // a large number of concurrent executions, onnxruntime might fallback to use the WASM backend.
+  // In this case, we set the number of threads to 1 to avoid errors like:
+  //  - `TypeError: The worker script or module filename must be an absolute path or a relative path starting with './' or '../'. Received "blob:nodedata:..."`
+  ONNX_COMMON.env.wasm.numThreads = 1;
+
+  let registerBackend = ONNX_COMMON.registerBackend;
+
+  // Define the constructors to monkey-patch
+  const TYPED_ARRAYS_CONSTRUCTOR_NAMES = ["Int8Array", "Int16Array", "Int32Array", "BigInt64Array", "Uint8Array", "Uint8ClampedArray", "Uint16Array", "Uint32Array", "BigUint64Array", "Float32Array", "Float64Array"];
+
+  // Keep a reference to the original initialization method
+  const originalMethod = onnxruntimeBackend.init;
+
+  // Monkey-patch the initialization function
+  onnxruntimeBackend.init = function (...args) {
+    // There is probably a better way to do this
+    Array.isArray = (x) => typeof x === "object" && x !== null && typeof x.length === "number" && x?.constructor.toString() === Array.toString();
+
+    // For each typed array constructor
+    for (const ctorName of TYPED_ARRAYS_CONSTRUCTOR_NAMES) {
+      // Get the constructor from the current context
+      const ctor = globalThis[ctorName];
+
+      // Get the corresponding test function from the `util` module
+      const value = types[`is${ctorName}`].bind(types);
+
+      // Monkey-patch the constructor so "x instanceof ctor" returns "types[`is${ctorName}`](x)"
+      Object.defineProperty(ctor, Symbol.hasInstance, {
+        value,
+        writable: true, // writable=true is necessary to overwrite the default implementation (and allow subsequent overwrites)
+        configurable: false,
+        enumerable: false,
+      });
+    }
 
-    // Register the backend with the highest priority, so it is used instead of the default one
-    registerBackend("test", onnxruntimeBackend, Number.POSITIVE_INFINITY);
+    // Call the original method
+    return originalMethod.apply(this, args);
+  };
 
+  // Register the backend with the highest priority, so it is used instead of the default one
+  registerBackend("test", onnxruntimeBackend, Number.POSITIVE_INFINITY);
 }
 
+export const MAX_MODEL_LOAD_TIME = 10_000; // 10 seconds
+export const MAX_TEST_EXECUTION_TIME = 30_000; // 30 seconds
+export const MAX_MODEL_DISPOSE_TIME = 1_000; // 1 second
 
-export let m = x => x;
-if (process.env.TESTING_REMOTELY) {
-    // Running in a remote environment where models are not present locally (e.g., GitHub actions).
-
-    // In this case, we use the "test" models, under the following org/username:
-    const TEST_USERNAME = 'Xenova';
-
-    m = (name) => {
-        // Split into parts: [username, model]
-        let parts = name.split(/\/+/, 2);
-        if (parts.length === 2) {
-            // Replace username
-            parts[0] = TEST_USERNAME;
-        } else {
-            // Add username
-            parts.unshift(TEST_USERNAME);
-        }
-
-        return parts.join('/');
-    }
-}
-
-export const MAX_TEST_EXECUTION_TIME = 60_000; // 60 seconds
+export const MAX_TEST_TIME = MAX_MODEL_LOAD_TIME + MAX_TEST_EXECUTION_TIME + MAX_MODEL_DISPOSE_TIME;
diff --git a/tests/maths.test.js b/tests/maths.test.js
deleted file mode 100644
index 3c00cfa26..000000000
--- a/tests/maths.test.js
+++ /dev/null
@@ -1,156 +0,0 @@
-
-import { compare } from './test_utils.js';
-
-import { getFile } from '../src/utils/hub.js';
-import { FFT, medianFilter, bankers_round, log_softmax } from '../src/utils/maths.js';
-
-
-const fft = (arr, complex = false) => {
-    let output;
-    let fft;
-    if (complex) {
-        fft = new FFT(arr.length / 2);
-        output = new Float64Array(fft.outputBufferSize);
-        fft.transform(output, arr);
-    } else {
-        fft = new FFT(arr.length);
-        output = new Float64Array(fft.outputBufferSize);
-        fft.realTransform(output, arr);
-    }
-    if (!fft.isPowerOfTwo) {
-        output = output.slice(0, complex ? arr.length : 2 * arr.length);
-    }
-    return output;
-}
-
-const fftTestsData = await (await getFile('./tests/data/fft_tests.json')).json()
-
-describe('Mathematical operations', () => {
-
-    describe('bankers rounding', () => {
-        it('should round up to nearest even', () => {
-            expect(bankers_round(-0.5)).toBeCloseTo(0);
-            expect(bankers_round(1.5)).toBeCloseTo(2);
-            expect(bankers_round(19.5)).toBeCloseTo(20);
-        });
-        it('should round down to nearest even', () => {
-            expect(bankers_round(-1.5)).toBeCloseTo(-2);
-            expect(bankers_round(2.5)).toBeCloseTo(2);
-            expect(bankers_round(18.5)).toBeCloseTo(18);
-        });
-    });
-
-    describe('median filtering', () => {
-
-
-        it('should compute median filter', async () => {
-            const t1 = new Float32Array([5, 12, 2, 6, 3, 10, 9, 1, 4, 8, 11, 7]);
-            const window = 3;
-
-            const target = new Float32Array([12, 5, 6, 3, 6, 9, 9, 4, 4, 8, 8, 11]);
-
-            const output = medianFilter(t1, window);
-            compare(output, target, 1e-3);
-        });
-
-
-        // TODO add tests for errors
-    });
-
-    describe('FFT', () => {
-        // Should match output of numpy fft
-        it('should compute real FFT for power of two', () => {
-            { // size = 4
-                // np.fft.fft([1,2,3,4]) == array([10.+0.j, -2.+2.j, -2.+0.j, -2.-2.j])
-                const input = new Float32Array([1, 2, 3, 4]);
-                const target = new Float32Array([10, 0, -2, 2, -2, 0, -2, -2]);
-
-                const output = fft(input);
-                compare(output, target, 1e-3);
-            }
-
-            { // size = 16
-                // np.fft.fft([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
-                // == array([136. +0.j        ,  -8.+40.21871594j,  -8.+19.3137085j ,
-                //            -8.+11.9728461j ,  -8. +8.j        ,  -8. +5.3454291j ,
-                //            -8. +3.3137085j ,  -8. +1.59129894j,  -8. +0.j        ,
-                //            -8. -1.59129894j,  -8. -3.3137085j ,  -8. -5.3454291j ,
-                //            -8. -8.j        ,  -8.-11.9728461j ,  -8.-19.3137085j ,
-                //            -8.-40.21871594j])
-                const input = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
-                const target = new Float32Array([136.0, 0.0, -8.0, 40.218715937006785, -8.0, 19.31370849898476, -8.0, 11.972846101323912, -8.0, 8.0, -8.0, 5.345429103354389, -8.0, 3.313708498984761, -8.0, 1.5912989390372658, -8.0, 0.0, -8.0, -1.5912989390372658, -8.0, -3.313708498984761, -8.0, -5.345429103354389, -8.0, -8.0, -8.0, -11.972846101323912, -8.0, -19.31370849898476, -8.0, -40.218715937006785]);
-
-                const output = fft(input);
-                compare(output, target, 1e-3);
-            }
-        });
-
-        it('should compute real FFT for non-power of two', () => {
-            { // size = 3
-                // np.fft.fft([1,2,3]) == array([ 6. +0.j, -1.5+0.8660254j, -1.5-0.8660254j])
-                const input = new Float32Array([1, 2, 3]);
-                const target = new Float32Array([6, 0, -1.5, 0.8660254, -1.5, -0.8660254]);
-
-                const output = fft(input);
-                compare(output, target, 1e-3);
-            }
-        });
-
-        it('should compute complex FFT for non-power of two', () => {
-            { // size = 3
-                // np.fft.fft([1+3j,2-2j,3+1j]) == array([ 6. +2.j, -4.09807621+4.3660254j, 1.09807621+2.6339746j])
-                const input = new Float32Array([1, 3, 2, -2, 3, 1]);
-                const target = new Float32Array([6, 2, -4.09807621, 4.3660254, 1.09807621, 2.6339746]);
-
-                const output = fft(input, true);
-                compare(output, target, 1e-3);
-            }
-        });
-
-        it('should compute complex FFT for power of two', () => {
-            { // size = 4
-                // np.fft.fft([1+4j, 2-3j,3+2j, 4-1j]) == array([10. +2.j, -4. +4.j, -2.+10.j,  0. +0.j])
-                const input = new Float32Array([1, 4, 2, -3, 3, 2, 4, -1]);
-                const target = new Float32Array([10, 2, -4, 4, -2, 10, 0, 0]);
-
-                const output = fft(input, true);
-                compare(output, target, 1e-3);
-            }
-        });
-    })
-
-    describe('FFT (dynamic)', () => {
-        // Should match output of numpy fft
-        for (const [name, test] of Object.entries(fftTestsData)) {
-            // if (test.input.length > 5) continue;
-            it(name, () => {
-                const output = fft(test.input, test.complex);
-
-                if (output.map((v, i) => Math.abs(v - test.output[i])).some(v => v > 1e-4)) {
-                    console.log('input', test.input)
-                    console.log('output', output)
-                    console.log('target', test.output)
-                }
-                compare(output, test.output, 1e-4);
-
-            });
-        }
-    });
-
-    describe('log softmax', () => {
-        // Should match output of scipy log_softmax
-        it('should compute log softmax correctly for usual values', () => {
-            const input = [0, 1, 2, 3];
-            const expected = [-3.4401896985611953, -2.4401896985611953, -1.4401896985611953, -0.44018969856119533];
-            const output = log_softmax(input);
-            compare(output, expected, 1e-13);
-        });
-
-        it('should compute log softmax correctly for values with large differences', () => {
-            const input = [1000, 1];
-            const expected = [0, -999];
-            const output = log_softmax(input);
-            compare(output, expected, 1e-13);
-        });
-    });
-});
diff --git a/tests/models.test.js b/tests/models.test.js
index 126e10e1d..f1bc7961c 100644
--- a/tests/models.test.js
+++ b/tests/models.test.js
@@ -2,147 +2,129 @@
  * Test that models loaded outside of the `pipeline` function work correctly (e.g., `AutoModel.from_pretrained(...)`);
  */
 
-import {
-    AutoTokenizer,
-    AutoModel,
-    AutoProcessor,
+import { AutoTokenizer, AutoModel, AutoProcessor, BertModel, GPT2Model, T5ForConditionalGeneration, CLIPTextModelWithProjection, CLIPVisionModelWithProjection, BertTokenizer, GPT2Tokenizer, T5Tokenizer, RawImage } from "../src/transformers.js";
 
-    BertModel,
-    GPT2Model,
-    T5Model,
-    CLIPTextModelWithProjection,
-    CLIPVisionModelWithProjection,
+import { init, MAX_TEST_EXECUTION_TIME } from "./init.js";
 
-    BertTokenizer,
-    GPT2Tokenizer,
-    T5Tokenizer,
-
-    RawImage,
-} from '../src/transformers.js';
-
-import { init, m, MAX_TEST_EXECUTION_TIME } from './init.js';
-
-import { compare } from './test_utils.js';
+import { compare } from "./test_utils.js";
 
 // Initialise the testing environment
 init();
 
-describe('Models', () => {
-
-    describe('Loading different architecture types', () => {
-
-        // List all models which will be tested
-        const models_to_test = [
-            // [name, modelClass, tokenizerClass]
-            ['bert-base-uncased', BertModel, BertTokenizer], // Encoder-only
-            ['gpt2', GPT2Model, GPT2Tokenizer],              // Decoder-only
-            ['t5-small', T5Model, T5Tokenizer],              // Encoder-decoder
-        ];
-
-        let texts = [
-            'Once upon a time',
-            'I like to eat apples',
-        ];
-
-        for (let [name, modelClass, tokenizerClass] of models_to_test) {
-
-            // Test that both the auto model and the specific model work
-            let tokenizers = [AutoTokenizer, tokenizerClass];
-            let models = [AutoModel, modelClass];
-
-            for (let i = 0; i < tokenizers.length; ++i) {
-                const tokenizerClassToTest = tokenizers[i];
-                const modelClassToTest = models[i];
-
-                it(`${name} (${modelClassToTest.name})`, async () => {
-                    const model_id = m(name);
-
-                    // Load model and tokenizer
-                    let tokenizer = await tokenizerClassToTest.from_pretrained(model_id);
-                    let model = await modelClassToTest.from_pretrained(model_id);
-
-                    let tests = [
-                        texts[0], // single
-                        texts,    // batched
-                    ]
-                    for (let test of tests) {
-                        let encodings = await tokenizer(test, { truncation: true, padding: true });
-                        let output = await model(encodings);
-
-                        if (output.logits) {
-                            // Ensure correct shapes
-                            let expected_shape = [...encodings.input_ids.dims, model.config.vocab_size];
-                            let actual_shape = output.logits.dims;
-                            compare(expected_shape, actual_shape);
-                        } else if (output.last_hidden_state) {
-                            let expected_shape = [...encodings.input_ids.dims, model.config.d_model];
-                            let actual_shape = output.last_hidden_state.dims;
-                            compare(expected_shape, actual_shape);
-                        } else {
-                            console.warn('Unexpected output', output);
-                            throw new Error('Unexpected output');
-                        }
-
-                    }
-
-                    await model.dispose();
-
-                }, MAX_TEST_EXECUTION_TIME);
-
+describe("Models", () => {
+  describe("Loading different architecture types", () => {
+    // List all models which will be tested
+    const models_to_test = [
+      // [name, modelClass, tokenizerClass]
+      ["hf-internal-testing/tiny-random-BertForMaskedLM", BertModel, BertTokenizer], // Encoder-only
+      ["hf-internal-testing/tiny-random-GPT2LMHeadModel", GPT2Model, GPT2Tokenizer], // Decoder-only
+      ["hf-internal-testing/tiny-random-T5ForConditionalGeneration", T5ForConditionalGeneration, T5Tokenizer], // Encoder-decoder
+    ];
+
+    const texts = ["Once upon a time", "I like to eat apples"];
+
+    for (const [model_id, modelClass, tokenizerClass] of models_to_test) {
+      // Test that both the auto model and the specific model work
+      const tokenizers = [AutoTokenizer, tokenizerClass];
+      const models = [AutoModel, modelClass];
+
+      for (let i = 0; i < tokenizers.length; ++i) {
+        const tokenizerClassToTest = tokenizers[i];
+        const modelClassToTest = models[i];
+
+        it(
+          `${model_id} (${modelClassToTest.name})`,
+          async () => {
+            // Load model and tokenizer
+            const tokenizer = await tokenizerClassToTest.from_pretrained(model_id);
+            const model = await modelClassToTest.from_pretrained(model_id);
+
+            const tests = [
+              texts[0], // single
+              texts, // batched
+            ];
+            for (const test of tests) {
+              const inputs = await tokenizer(test, { truncation: true, padding: true });
+              if (model.config.is_encoder_decoder) {
+                inputs.decoder_input_ids = inputs.input_ids;
+              }
+              const output = await model(inputs);
+
+              if (output.logits) {
+                // Ensure correct shapes
+                const expected_shape = [...inputs.input_ids.dims, model.config.vocab_size];
+                const actual_shape = output.logits.dims;
+                compare(expected_shape, actual_shape);
+              } else if (output.last_hidden_state) {
+                const expected_shape = [...inputs.input_ids.dims, model.config.d_model];
+                const actual_shape = output.last_hidden_state.dims;
+                compare(expected_shape, actual_shape);
+              } else {
+                console.warn("Unexpected output", output);
+                throw new Error("Unexpected output");
+              }
             }
-        }
-
-    });
-
-    describe('Running specific models', () => {
-        const models_to_test = [
-            'openai/clip-vit-base-patch16',
-        ];
-        it(`CLIP (text)`, async () => {
-            const model_id = m(models_to_test[0]);
-
-            // Load tokenizer and text model
-            const tokenizer = await AutoTokenizer.from_pretrained(model_id);
-            const text_model = await CLIPTextModelWithProjection.from_pretrained(model_id);
-
-            // Run tokenization
-            const texts = ['a photo of a car', 'a photo of a football match'];
-            const text_inputs = tokenizer(texts, { padding: true, truncation: true });
-
-            // Compute embeddings
-            const { text_embeds } = await text_model(text_inputs);
-
-            // Ensure correct shapes
-            const expected_shape = [texts.length, text_model.config.projection_dim];
-            const actual_shape = text_embeds.dims;
-            compare(expected_shape, actual_shape);
-
-            await text_model.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-
-        it(`CLIP (vision)`, async () => {
-            const model_id = m(models_to_test[0]);
-
-            // Load processor and vision model
-            const processor = await AutoProcessor.from_pretrained(model_id);
-            const vision_model = await CLIPVisionModelWithProjection.from_pretrained(model_id);
-
-            // Read image and run processor
-            const image = await RawImage.read('https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg');
-            const image_inputs = await processor(image);
-
-            // Compute embeddings
-            const { image_embeds } = await vision_model(image_inputs);
-
-            // Ensure correct shapes
-            const expected_shape = [1, vision_model.config.projection_dim];
-            const actual_shape = image_embeds.dims;
-            compare(expected_shape, actual_shape);
-
-            await vision_model.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
 
-    });
+            await model.dispose();
+          },
+          MAX_TEST_EXECUTION_TIME,
+        );
+      }
+    }
+  });
+
+  describe("Running specific models", () => {
+    const models_to_test = ["hf-internal-testing/tiny-random-CLIPModel"];
+    it(
+      `CLIP (text)`,
+      async () => {
+        const model_id = models_to_test[0];
+
+        // Load tokenizer and text model
+        const tokenizer = await AutoTokenizer.from_pretrained(model_id);
+        const text_model = await CLIPTextModelWithProjection.from_pretrained(model_id, { revision: "refs/pr/5" });
+
+        // Run tokenization
+        const texts = ["a photo of a car", "a photo of a football match"];
+        const text_inputs = tokenizer(texts, { padding: true, truncation: true });
+
+        // Compute embeddings
+        const { text_embeds } = await text_model(text_inputs);
+
+        // Ensure correct shapes
+        const expected_shape = [texts.length, text_model.config.projection_dim];
+        const actual_shape = text_embeds.dims;
+        compare(expected_shape, actual_shape);
+
+        await text_model.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      `CLIP (vision)`,
+      async () => {
+        const model_id = models_to_test[0];
+
+        // Load processor and vision model
+        const processor = await AutoProcessor.from_pretrained(model_id);
+        const vision_model = await CLIPVisionModelWithProjection.from_pretrained(model_id, { revision: "refs/pr/5" });
+
+        // Read image and run processor
+        const image = await RawImage.read("https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg");
+        const image_inputs = await processor(image);
+
+        // Compute embeddings
+        const { image_embeds } = await vision_model(image_inputs);
+
+        // Ensure correct shapes
+        const expected_shape = [1, vision_model.config.projection_dim];
+        const actual_shape = image_embeds.dims;
+        compare(expected_shape, actual_shape);
+
+        await vision_model.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
 });
diff --git a/tests/models/albert/tokenization.js b/tests/models/albert/tokenization.js
new file mode 100644
index 000000000..875bc418f
--- /dev/null
+++ b/tests/models/albert/tokenization.js
@@ -0,0 +1,183 @@
+import { AlbertTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, BERT_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = AlbertTokenizer;
+export const TEST_CONFIG = {
+  // - uses `StripAccents` normalizer
+  "Xenova/albert-base-v2": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["\u2581how", "\u2581are", "\u2581you", "\u2581doing", "?"],
+      ids: [2, 184, 50, 42, 845, 60, 3],
+      decoded: "[CLS] how are you doing?[SEP]",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["\u2581you", "\u2581should", "'", "ve", "\u2581done", "\u2581this"],
+      ids: [2, 42, 378, 22, 195, 677, 48, 3],
+      decoded: "[CLS] you should've done this[SEP]",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["\u25810", "12", "345", "67", "89", "\u25810", "\u25811", "\u25812", "\u25813", "\u25814", "\u25815", "\u25816", "\u25817", "\u25818", "\u25819", "\u258110", "\u2581100", "\u25811000"],
+      ids: [2, 713, 918, 21997, 4167, 3877, 713, 137, 172, 203, 268, 331, 400, 453, 469, 561, 332, 808, 6150, 3],
+      decoded: "[CLS] 0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000[SEP]",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["\u2581the", "\u2581company", "\u2581was", "\u2581founded", "\u2581in", "\u25812016", "."],
+      ids: [2, 14, 237, 23, 785, 19, 690, 9, 3],
+      decoded: "[CLS] the company was founded in 2016.[SEP]",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["\u2581a", "\u2581", "'", "ll", "\u2581", "!!", "to", "?'", "d", '"', "d", "\u2581of", ",", "\u2581can", "'", "t", "."],
+      ids: [2, 21, 13, 22, 211, 13, 19015, 262, 5663, 43, 7, 43, 16, 15, 92, 22, 38, 9, 3],
+      decoded: "[CLS] a 'll!!to?'d\"d of, can't.[SEP]",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["\u2581def", "\u2581main", "(", ")", ":", "\u2581pass"],
+      ids: [2, 6312, 407, 5, 6, 45, 1477, 3],
+      decoded: "[CLS] def main(): pass[SEP]",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["\u2581let", "\u2581a", "\u2581=", "\u2581ob", "j", ".", "to", "string", "(", ")", ";", "\u2581to", "string", "(", ")", ";"],
+      ids: [2, 408, 21, 800, 5122, 728, 9, 262, 11130, 5, 6, 73, 20, 11130, 5, 6, 73, 3],
+      decoded: "[CLS] let a = obj.tostring(); tostring();[SEP]",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["\u2581this", "\u2581is", "\u2581a", "\u2581test", "."],
+      ids: [2, 48, 25, 21, 1289, 9, 3],
+      decoded: "[CLS] this is a test.[SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["\u2581unwanted", ",", "running"],
+      ids: [2, 21095, 15, 11325, 3],
+      decoded: "[CLS] unwanted,running[SEP]",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["\u25811", "\u0000", "2", "\u25813"],
+      ids: [2, 137, 1, 135, 203, 3],
+      decoded: "[CLS] 1<unk>2 3[SEP]",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["\u2581hello", "\u2581world"],
+      ids: [2, 10975, 126, 3],
+      decoded: "[CLS] hello world[SEP]",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["\u2581hello", "\u2581world"],
+      ids: [2, 10975, 126, 3],
+      decoded: "[CLS] hello world[SEP]",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u2581", "\u751f\u6d3b\u7684\u771f\u8c1b\u662f"],
+      ids: [2, 13, 1, 3],
+      decoded: "[CLS] <unk>[SEP]",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u2581leading", "\u2581space"],
+      ids: [2, 1005, 726, 3],
+      decoded: "[CLS] leading space[SEP]",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581trailing", "\u2581space"],
+      ids: [2, 14323, 726, 3],
+      decoded: "[CLS] trailing space[SEP]",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["\u2581hi", "\u2581hello"],
+      ids: [2, 4148, 10975, 3],
+      decoded: "[CLS] hi hello[SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["\u2581test", "\u2581$1", "\u2581r", "2", "\u2581#3", "\u2581", "\u20ac", "4", "\u2581", "\u00a3", "5", "\u2581", "\u00a5", "6", "\u2581", "\u20a3", "7", "\u2581", "\u20b9", "8", "\u2581", "\u20b1", "9", "\u2581test"],
+      ids: [2, 1289, 3742, 761, 135, 11489, 13, 12, 300, 13, 11, 264, 13, 1, 379, 13, 1, 465, 13, 1, 457, 13, 1, 518, 1289, 3],
+      decoded: "[CLS] test $1 r2 #3 \u20ac4 \u00a35 <unk>6 <unk>7 <unk>8 <unk>9 test[SEP]",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["\u2581i", "\u2581bought", "\u2581an", "\u2581apple", "\u2581for", "\u2581$1", ".", "00", "\u2581at", "\u2581the", "\u2581store", "."],
+      ids: [2, 31, 2448, 40, 4037, 26, 3742, 9, 2032, 35, 14, 1718, 9, 3],
+      decoded: "[CLS] i bought an apple for $1.00 at the store.[SEP]",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u2581you", ".", ".", "."],
+      ids: [2, 42, 9, 9, 9, 3],
+      decoded: "[CLS] you...[SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u2581you", ".", ".", "."],
+      ids: [2, 42, 9, 9, 9, 3],
+      decoded: "[CLS] you...[SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u2581you", ".", ".", ".", "\u2581you", ".", ".", "."],
+      ids: [2, 42, 9, 9, 9, 42, 9, 9, 9, 3],
+      decoded: "[CLS] you... you...[SEP]",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["\u2581weird", "\u2581", "~", "\u2581edge", "\u2581", "~", "\u2581case"],
+      ids: [2, 5455, 13, 1, 1407, 13, 1, 610, 3],
+      decoded: "[CLS] weird <unk> edge <unk> case[SEP]",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581this", "\u2581is", "\u2581a", "\u2581test", "\u2581", "."],
+      ids: [2, 48, 25, 21, 1289, 13, 9, 3],
+      decoded: "[CLS] this is a test.[SEP]",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u2581", "\ud83d\ude02", "\u2581", "\ud83d\udc4d", "\u2581", "\ud83e\udd23", "\u2581", "\ud83d\ude0d", "\u2581", "\ud83d\ude2d", "\u2581", "\ud83c\udf89", "\u2581", "\ud83d\ude4f", "\u2581", "\ud83d\ude0a", "\u2581", "\ud83d\udd25", "\u2581", "\ud83d\ude01", "\u2581", "\ud83d\ude05", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\ude06", "\u2581", "\ud83d\udc4f", "\u2581", "\u2764", "\u2581", "\ud83d\udc9c", "\u2581", "\ud83d\udc9a", "\u2581", "\ud83d\udc97", "\u2581", "\ud83d\udc99", "\u2581", "\ud83d\udda4", "\u2581", "\ud83d\ude0e", "\u2581", "\ud83d\udc4c", "\u2581", "\ud83e\udd73", "\u2581", "\ud83d\udcaa", "\u2581", "\u2728", "\u2581", "\ud83d\udc49", "\u2581", "\ud83d\udc40", "\u2581", "\ud83d\udcaf", "\u2581", "\ud83c\udf88", "\u2581", "\ud83d\ude48", "\u2581", "\ud83d\ude4c", "\u2581", "\ud83d\udc80", "\u2581", "\ud83d\udc47", "\u2581", "\ud83d\udc4b", "\u2581", "\u2705", "\u2581", "\ud83c\udf81", "\u2581", "\ud83c\udf1e", "\u2581", "\ud83c\udf38", "\u2581", "\ud83d\udcb0"],
+      ids: [2, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 3],
+      decoded: "[CLS] <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk>[SEP]",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2581", "\u2728", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\udc41", "\u2581", "\ud83d\udc71\ud83c\udffb", "\u2581", "\ud83d\udd75", "\u2581", "\u2642", "\u2581", "\ud83e\uddd9\ud83c\udffb", "\u2581", "\u2642", "\u2581", "\ud83d\udc68\ud83c\udffb", "\u2581", "\ud83c\udf3e", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83d\udc69", "\u2581", "\u2764", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc67", "\u2581", "\ud83d\udc66", "\u2581", "\ud83e\uddd1\ud83c\udffb", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1\ud83c\udffb", "\u2581", "\ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f", "\u2581", "\ud83d\udc68\ud83c\udffb", "\u2581", "\u2764", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68\ud83c\udffc"],
+      ids: [2, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 13, 1, 3],
+      decoded: "[CLS] <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk>[SEP]",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["\u2581", "ah", "\u535a\u63a8", "zz"],
+      ids: [2, 13, 1307, 1, 5092, 3],
+      decoded: "[CLS] ah<unk>zz[SEP]",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["\u2581hello"],
+      ids: [2, 10975, 3],
+      decoded: "[CLS] hello[SEP]",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["\u2581hello", "!", "how", "\u2581are", "\u2581you", "?"],
+      ids: [2, 10975, 187, 1544, 50, 42, 60, 3],
+      decoded: "[CLS] hello!how are you?[SEP]",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["\u2581hall", "o", "!", "how", "\u2581are", "\u2581you", "?"],
+      ids: [2, 554, 111, 187, 1544, 50, 42, 60, 3],
+      decoded: "[CLS] hallo!how are you?[SEP]",
+    },
+  },
+};
diff --git a/tests/models/all_tokenization_tests.js b/tests/models/all_tokenization_tests.js
new file mode 100644
index 000000000..00ec6d639
--- /dev/null
+++ b/tests/models/all_tokenization_tests.js
@@ -0,0 +1,22 @@
+export * as AlbertTokenizer from "./albert/tokenization.js";
+export * as BertTokenizer from "./bert/tokenization.js";
+export * as BlenderbotSmallTokenizer from "./blenderbot_small/tokenization.js";
+export * as BloomTokenizer from "./bloom/tokenization.js";
+export * as CLIPTokenizer from "./clip/tokenization.js";
+export * as DebertaV2Tokenizer from "./deberta-v2/tokenization.js";
+export * as DistilBertTokenizer from "./distilbert/tokenization.js";
+export * as EsmTokenizer from "./esm/tokenization.js";
+export * as FalconTokenizer from "./falcon/tokenization.js";
+export * as GPT2Tokenizer from "./gpt2/tokenization.js";
+export * as GemmaTokenizer from "./gemma/tokenization.js";
+export * as LlamaTokenizer from "./llama/tokenization.js";
+export * as M2M100Tokenizer from "./m2m_100/tokenization.js";
+export * as MPNetTokenizer from "./mpnet/tokenization.js";
+export * as NllbTokenizer from "./nllb/tokenization.js";
+export * as Qwen2Tokenizer from "./qwen2/tokenization.js";
+export * as RobertaTokenizer from "./roberta/tokenization.js";
+export * as T5Tokenizer from "./t5/tokenization.js";
+export * as VitsTokenizer from "./vits/tokenization.js";
+export * as Wav2Vec2CTCTokenizer from "./wav2vec2/tokenization.js";
+export * as WhisperTokenizer from "./whisper/tokenization.js";
+export * as XLMRobertaTokenizer from "./xlm-roberta/tokenization.js";
diff --git a/tests/models/bert/tokenization.js b/tests/models/bert/tokenization.js
new file mode 100644
index 000000000..54b253260
--- /dev/null
+++ b/tests/models/bert/tokenization.js
@@ -0,0 +1,1335 @@
+import { BertTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, BERT_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = BertTokenizer;
+export const TEST_CONFIG = {
+  "Xenova/bert-base-uncased": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["how", "are", "you", "doing", "?"],
+      ids: [101, 2129, 2024, 2017, 2725, 1029, 102],
+      decoded: "[CLS] how are you doing? [SEP]",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["you", "should", "'", "ve", "done", "this"],
+      ids: [101, 2017, 2323, 1005, 2310, 2589, 2023, 102],
+      decoded: "[CLS] you should've done this [SEP]",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["the", "company", "was", "founded", "in", "2016", "."],
+      ids: [101, 1996, 2194, 2001, 2631, 1999, 2355, 1012, 102],
+      decoded: "[CLS] the company was founded in 2016. [SEP]",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["a", "'", "ll", "!", "!", "to", "?", "'", "d", "'", "'", "d", "of", ",", "can", "'", "t", "."],
+      ids: [101, 1037, 1005, 2222, 999, 999, 2000, 1029, 1005, 1040, 1005, 1005, 1040, 1997, 1010, 2064, 1005, 1056, 1012, 102],
+      decoded: "[CLS] a'll!! to?'d'' d of, can't. [SEP]",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "main", "(", ")", ":", "pass"],
+      ids: [101, 13366, 2364, 1006, 1007, 1024, 3413, 102],
+      decoded: "[CLS] def main ( ) : pass [SEP]",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "ob", "##j", ".", "to", "##st", "##ring", "(", ")", ";", "to", "##st", "##ring", "(", ")", ";"],
+      ids: [101, 2292, 1037, 1027, 27885, 3501, 1012, 2000, 3367, 4892, 1006, 1007, 1025, 2000, 3367, 4892, 1006, 1007, 1025, 102],
+      decoded: "[CLS] let a = obj. tostring ( ) ; tostring ( ) ; [SEP]",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["this", "is", "a", "test", "."],
+      ids: [101, 2023, 2003, 1037, 3231, 1012, 102],
+      decoded: "[CLS] this is a test. [SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["unwanted", ",", "running"],
+      ids: [101, 18162, 1010, 2770, 102],
+      decoded: "[CLS] unwanted, running [SEP]",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["123"],
+      ids: [101, 13138, 102],
+      decoded: "[CLS] 123 [SEP]",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["hello", "world"],
+      ids: [101, 7592, 2088, 102],
+      decoded: "[CLS] hello world [SEP]",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello", "world"],
+      ids: [101, 7592, 2088, 102],
+      decoded: "[CLS] hello world [SEP]",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u751f", "[UNK]", "\u7684", "\u771f", "[UNK]", "[UNK]"],
+      ids: [101, 1910, 100, 1916, 1921, 100, 100, 102],
+      decoded: "[CLS] \u751f [UNK] \u7684 \u771f [UNK] [UNK] [SEP]",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["leading", "space"],
+      ids: [101, 2877, 2686, 102],
+      decoded: "[CLS] leading space [SEP]",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["trailing", "space"],
+      ids: [101, 12542, 2686, 102],
+      decoded: "[CLS] trailing space [SEP]",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["hi", "hello"],
+      ids: [101, 7632, 7592, 102],
+      decoded: "[CLS] hi hello [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "r", "##2", "#", "3", "\u20ac", "##4", "\u00a35", "\u00a5", "##6", "[UNK]", "\u20b9", "##8", "\u20b1", "##9", "test"],
+      ids: [101, 3231, 1002, 1015, 1054, 2475, 1001, 1017, 1574, 2549, 27813, 1071, 2575, 100, 1576, 2620, 1575, 2683, 3231, 102],
+      decoded: "[CLS] test $ 1 r2 # 3 \u20ac4 \u00a35 \u00a56 [UNK] \u20b98 \u20b19 test [SEP]",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["i", "bought", "an", "apple", "for", "$", "1", ".", "00", "at", "the", "store", "."],
+      ids: [101, 1045, 4149, 2019, 6207, 2005, 1002, 1015, 1012, 4002, 2012, 1996, 3573, 1012, 102],
+      decoded: "[CLS] i bought an apple for $ 1. 00 at the store. [SEP]",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u2026"],
+      ids: [101, 2017, 1529, 102],
+      decoded: "[CLS] you \u2026 [SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u2026"],
+      ids: [101, 2017, 1529, 102],
+      decoded: "[CLS] you \u2026 [SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u2026", "you", "\u2026"],
+      ids: [101, 2017, 1529, 2017, 1529, 102],
+      decoded: "[CLS] you \u2026 you \u2026 [SEP]",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["weird", "\uff5e", "edge", "\uff5e", "case"],
+      ids: [101, 6881, 1995, 3341, 1995, 2553, 102],
+      decoded: "[CLS] weird \uff5e edge \uff5e case [SEP]",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "."],
+      ids: [101, 100, 100, 100, 100, 100, 1012, 102],
+      decoded: "[CLS] [UNK] [UNK] [UNK] [UNK] [UNK]. [SEP]",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [101, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 102],
+      decoded: "[CLS] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [SEP]",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [101, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 102],
+      decoded: "[CLS] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [SEP]",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["ah", "\u535a", "[UNK]", "z", "##z"],
+      ids: [101, 6289, 1786, 100, 1062, 2480, 102],
+      decoded: "[CLS] ah \u535a [UNK] zz [SEP]",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["hello"],
+      ids: [101, 7592, 102],
+      decoded: "[CLS] hello [SEP]",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["hello", "!", "how", "are", "you", "?"],
+      ids: [101, 7592, 999, 2129, 2024, 2017, 1029, 102],
+      decoded: "[CLS] hello! how are you? [SEP]",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["hall", "##o", "!", "how", "are", "you", "?"],
+      ids: [101, 2534, 2080, 999, 2129, 2024, 2017, 1029, 102],
+      decoded: "[CLS] hallo! how are you? [SEP]",
+    },
+    ONLY_WHITESPACE: {
+      text: BASE_TEST_STRINGS.ONLY_WHITESPACE,
+      tokens: [],
+      ids: [101, 102],
+      decoded: "[CLS] [SEP]",
+    },
+
+    TEXT_PAIR: {
+      text: "hello",
+      text_pair: "world",
+      tokens: ["hello", "world"],
+      ids: [101, 7592, 102, 2088, 102],
+      decoded: "[CLS] hello [SEP] world [SEP]",
+    },
+  },
+  "Xenova/bert-base-cased": {
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "o", "##b", "##j", ".", "to", "##S", "##tring", "(", ")", ";", "to", "##S", "##tring", "(", ")", ";"],
+      ids: [101, 1519, 170, 134, 184, 1830, 3361, 119, 1106, 1708, 28108, 113, 114, 132, 1106, 1708, 28108, 113, 114, 132, 102],
+      decoded: "[CLS] let a = obj. toString ( ) ; toString ( ) ; [SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "##wan", "##t\u00e9", "##d", ",", "running"],
+      ids: [101, 7414, 5491, 14608, 1181, 117, 1919, 102],
+      decoded: "[CLS] UNwant\u00e9d, running [SEP]",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u751f", "[UNK]", "[UNK]", "\u771f", "[UNK]", "[UNK]"],
+      ids: [101, 1056, 100, 100, 1061, 100, 100, 102],
+      decoded: "[CLS] \u751f [UNK] [UNK] \u771f [UNK] [UNK] [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "R", "##2", "#", "3", "\u20ac", "##4", "\u00a3", "##5", "\u00a5", "##6", "[UNK]", "\u20b9", "##8", "\u20b1", "##9", "test"],
+      ids: [101, 2774, 109, 122, 155, 1477, 108, 124, 836, 1527, 202, 1571, 203, 1545, 100, 838, 1604, 837, 1580, 2774, 102],
+      decoded: "[CLS] test $ 1 R2 # 3 \u20ac4 \u00a35 \u00a56 [UNK] \u20b98 \u20b19 test [SEP]",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "bought", "an", "apple", "for", "$", "1", ".", "00", "at", "the", "store", "."],
+      ids: [101, 146, 3306, 1126, 12075, 1111, 109, 122, 119, 3135, 1120, 1103, 2984, 119, 102],
+      decoded: "[CLS] I bought an apple for $ 1. 00 at the store. [SEP]",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["weird", "[UNK]", "edge", "[UNK]", "case"],
+      ids: [101, 6994, 100, 2652, 100, 1692, 102],
+      decoded: "[CLS] weird [UNK] edge [UNK] case [SEP]",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["ah", "[UNK]", "[UNK]", "z", "##z"],
+      ids: [101, 18257, 100, 100, 195, 1584, 102],
+      decoded: "[CLS] ah [UNK] [UNK] zz [SEP]",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["H", "##\u00e9", "##llo"],
+      ids: [101, 145, 2744, 6643, 102],
+      decoded: "[CLS] H\u00e9llo [SEP]",
+    },
+  },
+
+  "Xenova/bert-base-multilingual-cased-ner-hrl": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "are", "you", "doing", "?"],
+      ids: [101, 14962, 10301, 13028, 30918, 136, 102],
+      decoded: "[CLS] How are you doing? [SEP]",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "should", "'", "ve", "done", "this"],
+      ids: [101, 11065, 14819, 112, 10323, 20378, 10531, 102],
+      decoded: "[CLS] You should've done this [SEP]",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "company", "was", "founded", "in", "2016", "."],
+      ids: [101, 10117, 12100, 10134, 14078, 10106, 10255, 119, 102],
+      decoded: "[CLS] The company was founded in 2016. [SEP]",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "'", "ll", "!", "!", "to", "?", "'", "d", "'", "'", "d", "of", ",", "can", "'", "t", "."],
+      ids: [101, 138, 112, 22469, 106, 106, 10114, 136, 112, 172, 112, 112, 172, 10108, 117, 10944, 112, 188, 119, 102],
+      decoded: "[CLS] A'll!! to?'d'' d of, can't. [SEP]",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "ob", "##j", ".", "to", "##S", "##trin", "##g", "(", ")", ";", "to", "##S", "##trin", "##g", "(", ")", ";"],
+      ids: [101, 13595, 169, 134, 17339, 10418, 119, 10114, 10731, 109163, 10240, 113, 114, 132, 10114, 10731, 109163, 10240, 113, 114, 132, 102],
+      decoded: "[CLS] let a = obj. toString ( ) ; toString ( ) ; [SEP]",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["This", "is", "a", "test", "."],
+      ids: [101, 10747, 10124, 169, 15839, 119, 102],
+      decoded: "[CLS] This is a test. [SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "##want", "##\u00e9d", ",", "running"],
+      ids: [101, 26578, 104216, 84193, 117, 18020, 102],
+      decoded: "[CLS] UNwant\u00e9d, running [SEP]",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hello", "World"],
+      ids: [101, 31178, 10315, 102],
+      decoded: "[CLS] Hello World [SEP]",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hell", "##o", "world"],
+      ids: [101, 61694, 10133, 11356, 102],
+      decoded: "[CLS] hello world [SEP]",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u751f", "\u6d3b", "\u7684", "\u771f", "\u8c1b", "\u662f"],
+      ids: [101, 5600, 4978, 5718, 5769, 7378, 4380, 102],
+      decoded: "[CLS] \u751f \u6d3b \u7684 \u771f \u8c1b \u662f [SEP]",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["trail", "##ing", "space"],
+      ids: [101, 56559, 10230, 16199, 102],
+      decoded: "[CLS] trailing space [SEP]",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["Hi", "Hello"],
+      ids: [101, 20065, 31178, 102],
+      decoded: "[CLS] Hi Hello [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "R2", "#", "3", "\u20ac", "##4", "\u00a3", "##5", "\u00a5", "##6", "[UNK]", "\u20b9", "##8", "[UNK]", "test"],
+      ids: [101, 15839, 109, 122, 94000, 108, 124, 1775, 11011, 201, 11166, 202, 11211, 100, 1776, 11396, 100, 15839, 102],
+      decoded: "[CLS] test $ 1 R2 # 3 \u20ac4 \u00a35 \u00a56 [UNK] \u20b98 [UNK] test [SEP]",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "bought", "an", "app", "##le", "for", "$", "1", ".", "00", "at", "the", "store", "."],
+      ids: [101, 146, 28870, 10151, 72894, 10284, 10142, 109, 122, 119, 11025, 10160, 10105, 13708, 119, 102],
+      decoded: "[CLS] I bought an apple for $ 1. 00 at the store. [SEP]",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "[UNK]"],
+      ids: [101, 13028, 100, 102],
+      decoded: "[CLS] you [UNK] [SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "[UNK]"],
+      ids: [101, 13028, 100, 102],
+      decoded: "[CLS] you [UNK] [SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "[UNK]", "you", "[UNK]"],
+      ids: [101, 13028, 100, 13028, 100, 102],
+      decoded: "[CLS] you [UNK] you [UNK] [SEP]",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["wei", "##rd", "\uff5e", "edge", "\uff5e", "case"],
+      ids: [101, 86981, 12023, 10096, 30599, 10096, 13474, 102],
+      decoded: "[CLS] weird \uff5e edge \uff5e case [SEP]",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["ah", "\u535a", "\u63a8", "z", "##z"],
+      ids: [101, 69863, 2684, 4163, 194, 10305, 102],
+      decoded: "[CLS] ah \u535a \u63a8 zz [SEP]",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["H", "##\u00e9l", "##lo"],
+      ids: [101, 145, 24817, 10715, 102],
+      decoded: "[CLS] H\u00e9llo [SEP]",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["He", "##LL", "##o", "!", "how", "Are", "yo", "##U", "?"],
+      ids: [101, 10357, 82834, 10133, 106, 14796, 13491, 13672, 12022, 136, 102],
+      decoded: "[CLS] HeLLo! how Are yoU? [SEP]",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["H", "##\u00e4", "##LL", "##o", "!", "how", "Are", "yo", "##U", "?"],
+      ids: [101, 145, 11013, 82834, 10133, 106, 14796, 13491, 13672, 12022, 136, 102],
+      decoded: "[CLS] H\u00e4LLo! how Are yoU? [SEP]",
+    },
+  },
+  "Xenova/paraphrase-multilingual-MiniLM-L12-v2": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["\u2581How", "\u2581are", "\u2581you", "\u2581doing", "?"],
+      ids: [0, 11249, 621, 398, 20594, 32, 2],
+      decoded: "<s> How are you doing?</s>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["\u2581You", "\u2581should", "'", "ve", "\u2581done", "\u2581this"],
+      ids: [0, 2583, 5608, 25, 272, 16940, 903, 2],
+      decoded: "<s> You should've done this</s>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["\u2581The", "\u2581company", "\u2581was", "\u2581found", "ed", "\u2581in", "\u25812016."],
+      ids: [0, 581, 14380, 509, 14037, 297, 23, 6360, 2],
+      decoded: "<s> The company was founded in 2016.</s>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["\u2581A", "\u2581'", "ll", "\u2581!!", "to", "?", "'", "d", "''", "d", "\u2581of", ",", "\u2581can", "'", "t", "."],
+      ids: [0, 62, 242, 1181, 6506, 188, 32, 25, 71, 4765, 71, 111, 4, 831, 25, 18, 5, 2],
+      decoded: "<s> A 'll!!to?'d''d of, can't.</s>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["\u2581de", "f", "\u2581main", "(", "):", "\u2581pass"],
+      ids: [0, 8, 420, 5201, 132, 2077, 27875, 2],
+      decoded: "<s> def main(): pass</s>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["\u2581let", "\u2581a", "\u2581=", "\u2581ob", "j", ".", "to", "Str", "ing", "(", ");", "\u2581to", "Str", "ing", "(", ");"],
+      ids: [0, 2633, 10, 2203, 995, 170, 5, 188, 71713, 214, 132, 3142, 47, 71713, 214, 132, 3142, 2],
+      decoded: "<s> let a = obj.toString(); toString();</s>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["\u2581This", "\u2581is", "\u2581a", "\u2581test", "."],
+      ids: [0, 3293, 83, 10, 3034, 5, 2],
+      decoded: "<s> This is a test.</s>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["\u2581UN", "wan", "t\u00e9", "d", ",", "run", "ning"],
+      ids: [0, 8274, 3206, 2312, 71, 4, 16428, 592, 2],
+      decoded: "<s> UNwant\u00e9d,running</s>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["\u25811", "\u0000", "2", "\u25813"],
+      ids: [0, 106, 3, 304, 138, 2],
+      decoded: "<s> 1<unk>2 3</s>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["\u2581Hello", "\u2581World"],
+      ids: [0, 35378, 6661, 2],
+      decoded: "<s> Hello World</s>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["\u2581hell", "o", "\u2581world"],
+      ids: [0, 33600, 31, 8999, 2],
+      decoded: "<s> hello world</s>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u2581", "\u751f\u6d3b\u7684", "\u771f", "\u8c1b", "\u662f"],
+      ids: [0, 6, 62668, 5364, 245875, 354, 2],
+      decoded: "<s> \u751f\u6d3b\u7684\u771f\u8c1b\u662f</s>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u2581leading", "\u2581space"],
+      ids: [0, 105207, 32628, 2],
+      decoded: "<s> leading space</s>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581trail", "ing", "\u2581space"],
+      ids: [0, 141037, 214, 32628, 2],
+      decoded: "<s> trailing space</s>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["\u2581Hi", "\u2581Hello"],
+      ids: [0, 2673, 35378, 2],
+      decoded: "<s> Hi Hello</s>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["\u2581test", "\u2581$1", "\u2581R", "2", "\u2581#3", "\u2581\u20ac", "4", "\u2581\u00a3", "5", "\u2581", "\u00a5", "6", "\u2581", "\u20a3", "7", "\u2581\u20b9", "8", "\u2581", "\u20b1", "9", "\u2581test"],
+      ids: [0, 3034, 38629, 627, 304, 111378, 2505, 617, 11762, 758, 6, 32389, 910, 6, 3, 966, 87316, 1019, 6, 247425, 1126, 3034, 2],
+      decoded: "<s> test $1 R2 #3 \u20ac4 \u00a35 \u00a56 <unk>7 \u20b98 \u20b19 test</s>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["\u2581I", "\u2581bought", "\u2581an", "\u2581apple", "\u2581for", "\u2581$", "1.00", "\u2581at", "\u2581the", "\u2581store", "."],
+      ids: [0, 87, 123997, 142, 108787, 100, 3650, 146533, 99, 70, 4343, 5, 2],
+      decoded: "<s> I bought an apple for $1.00 at the store.</s>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u2581you", "..."],
+      ids: [0, 398, 27, 2],
+      decoded: "<s> you...</s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u2581you", "..."],
+      ids: [0, 398, 27, 2],
+      decoded: "<s> you...</s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u2581you", "...", "\u2581you", "..."],
+      ids: [0, 398, 27, 398, 27, 2],
+      decoded: "<s> you... you...</s>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["\u2581weird", "\u2581", "\uff5e", "\u2581edge", "\u2581", "\uff5e", "\u2581case"],
+      ids: [0, 179459, 6, 6087, 121303, 6, 6087, 7225, 2],
+      decoded: "<s> weird \uff5e edge \uff5e case</s>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581This", "\u2581is", "\u2581a", "\u2581test", "\u2581", "."],
+      ids: [0, 3293, 83, 10, 3034, 6, 5, 2],
+      decoded: "<s> This is a test.</s>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u2581", "\ud83d\ude02", "\u2581", "\ud83d\udc4d", "\u2581", "\ud83e\udd23", "\u2581", "\ud83d\ude0d", "\u2581", "\ud83d\ude2d", "\u2581", "\ud83c\udf89", "\u2581", "\ud83d\ude4f", "\u2581", "\ud83d\ude0a", "\u2581", "\ud83d\udd25", "\u2581", "\ud83d\ude01", "\u2581", "\ud83d\ude05", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\ude06", "\u2581", "\ud83d\udc4f", "\u2581\u2764", "\ufe0f", "\u2581", "\ud83d\udc9c", "\u2581", "\ud83d\udc9a", "\u2581", "\ud83d\udc97", "\u2581", "\ud83d\udc99", "\u2581", "\ud83d\udda4", "\u2581", "\ud83d\ude0e", "\u2581", "\ud83d\udc4c", "\u2581", "\ud83e\udd73", "\u2581", "\ud83d\udcaa", "\u2581", "\u2728", "\u2581", "\ud83d\udc49", "\u2581", "\ud83d\udc40", "\u2581", "\ud83d\udcaf", "\u2581", "\ud83c\udf88", "\u2581", "\ud83d\ude48", "\u2581", "\ud83d\ude4c", "\u2581", "\ud83d\udc80", "\u2581", "\ud83d\udc47", "\u2581", "\ud83d\udc4b", "\u2581", "\u2705", "\u2581", "\ud83c\udf81", "\u2581", "\ud83c\udf1e", "\u2581", "\ud83c\udf38", "\u2581", "\ud83d\udcb0"],
+      ids: [0, 6, 115114, 6, 118280, 6, 243385, 6, 84464, 6, 232773, 6, 243816, 6, 113612, 6, 82803, 6, 222326, 6, 201344, 6, 239569, 6, 243544, 6, 191876, 6, 243404, 49933, 15755, 6, 244233, 6, 244162, 6, 244181, 6, 243892, 6, 245820, 6, 161546, 6, 204811, 6, 3, 6, 238992, 6, 167474, 6, 120242, 6, 245561, 6, 244864, 6, 246144, 6, 244459, 6, 244703, 6, 246887, 6, 144400, 6, 246511, 6, 142325, 6, 244230, 6, 245559, 6, 243374, 6, 245200, 2],
+      decoded: "<s> \ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c <unk> \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0</s>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2581", "\u2728", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\udc41", "\ufe0f", "\u2581", "\ud83d\udc71", "\ud83c\udffb", "\u2581", "\ud83d\udd75", "\u2581", "\u2642", "\ufe0f", "\u2581", "\ud83e\uddd9", "\ud83c\udffb", "\u2581", "\u2642", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u2581", "\ud83c\udf3e", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83d\udc69", "\u2581\u2764", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc67", "\u2581", "\ud83d\udc66", "\u2581", "\ud83e\uddd1", "\ud83c\udffb", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1", "\ud83c\udffb", "\u2581", "\ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u2581\u2764", "\ufe0f", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\ud83c\udffc"],
+      ids: [0, 6, 167474, 6, 243544, 6, 246984, 15755, 6, 247201, 79500, 6, 248325, 6, 228250, 15755, 6, 3, 79500, 6, 228250, 6, 244314, 79500, 6, 246529, 6, 3, 6, 247443, 6, 3, 6, 244785, 49933, 6, 244960, 6, 244314, 6, 244785, 6, 244785, 6, 245719, 6, 246167, 6, 3, 79500, 6, 247443, 6, 3, 79500, 6, 3, 6, 244314, 79500, 49933, 15755, 6, 244960, 6, 244314, 239719, 2],
+      decoded: "<s> \u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75 \u2642\ufe0f <unk>\ud83c\udffb \u2642 \ud83d\udc68\ud83c\udffb \ud83c\udf3e <unk> \ud83e\udd1d <unk> \ud83d\udc69 \u2764 \ud83d\udc8b \ud83d\udc68 \ud83d\udc69 \ud83d\udc69 \ud83d\udc67 \ud83d\udc66 <unk>\ud83c\udffb \ud83e\udd1d <unk>\ud83c\udffb <unk> \ud83d\udc68\ud83c\udffb \u2764\ufe0f \ud83d\udc8b \ud83d\udc68\ud83c\udffc</s>",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["\u2581ah", "\u535a", "\u63a8", "zz"],
+      ids: [0, 1263, 11173, 10238, 13894, 2],
+      decoded: "<s> ah\u535a\u63a8zz</s>",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["\u2581H\u00e9", "llo"],
+      ids: [0, 88064, 9284, 2],
+      decoded: "<s> H\u00e9llo</s>",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["\u2581He", "LL", "o", "!", "how", "\u2581Are", "\u2581yo", "U", "?"],
+      ids: [0, 1529, 23708, 31, 38, 47251, 15901, 3005, 1062, 32, 2],
+      decoded: "<s> HeLLo!how Are yoU?</s>",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["\u2581H\u00e4", "LL", "o", "!", "how", "\u2581Are", "\u2581yo", "U", "?"],
+      ids: [0, 28863, 23708, 31, 38, 47251, 15901, 3005, 1062, 32, 2],
+      decoded: "<s> H\u00e4LLo!how Are yoU?</s>",
+    },
+  },
+  "Xenova/bert-base-multilingual-uncased-sentiment": {
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "ob", "##j", ".", "tos", "##tri", "##ng", "(", ")", ";", "tos", "##tri", "##ng", "(", ")", ";"],
+      ids: [101, 12421, 143, 134, 15547, 10428, 119, 53564, 27711, 10422, 113, 114, 132, 53564, 27711, 10422, 113, 114, 132, 102],
+      decoded: "[CLS] let a = obj. tostring ( ) ; tostring ( ) ; [SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["un", "##wan", "##ted", ",", "running"],
+      ids: [101, 10119, 15134, 11894, 117, 16484, 102],
+      decoded: "[CLS] unwanted, running [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "r2", "#", "3", "\u20ac", "##4", "\u00a3", "##5", "\u00a5", "##6", "[UNK]", "\u20b9", "##8", "\u20b1", "##9", "test"],
+      ids: [101, 14084, 109, 122, 85583, 108, 124, 1329, 11124, 175, 11301, 177, 11325, 100, 1332, 11544, 1330, 11518, 14084, 102],
+      decoded: "[CLS] test $ 1 r2 # 3 \u20ac4 \u00a35 \u00a56 [UNK] \u20b98 \u20b19 test [SEP]",
+    },
+  },
+  "Xenova/multilingual-e5-small": {
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581trail", "ing", "\u2581space", "\u2581"],
+      ids: [0, 141037, 214, 32628, 6, 2],
+      decoded: "<s> trailing space </s>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u2581you", "...", "\u2581"],
+      ids: [0, 398, 27, 6, 2],
+      decoded: "<s> you... </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u2581you", "...", "\u2581"],
+      ids: [0, 398, 27, 6, 2],
+      decoded: "<s> you... </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u2581you", "...", "\u2581you", "...", "\u2581"],
+      ids: [0, 398, 27, 398, 27, 6, 2],
+      decoded: "<s> you... you... </s>",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["\u2581He", "LL", "o", "!", "how", "\u2581Are", "\u2581yo", "U", "?", "\u2581"],
+      ids: [0, 1529, 23708, 31, 38, 47251, 15901, 3005, 1062, 32, 6, 2],
+      decoded: "<s> HeLLo!how Are yoU? </s>",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["\u2581H\u00e4", "LL", "o", "!", "how", "\u2581Are", "\u2581yo", "U", "?", "\u2581"],
+      ids: [0, 28863, 23708, 31, 38, 47251, 15901, 3005, 1062, 32, 6, 2],
+      decoded: "<s> H\u00e4LLo!how Are yoU? </s>",
+    },
+  },
+  "Xenova/bge-small-zh-v1.5": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["[UNK]", "are", "you", "doi", "##ng", "?"],
+      ids: [101, 100, 8995, 8357, 9962, 8291, 136, 102],
+      decoded: "[CLS] [UNK] are you doing? [SEP]",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["[UNK]", "sh", "##ould", "'", "ve", "don", "##e", "this"],
+      ids: [101, 100, 11167, 11734, 112, 12810, 9524, 8154, 8554, 102],
+      decoded: "[CLS] [UNK] should've done this [SEP]",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["[UNK]", "company", "was", "f", "##ound", "##ed", "in", "2016", "."],
+      ids: [101, 100, 10007, 9947, 148, 11477, 8303, 8217, 8112, 119, 102],
+      decoded: "[CLS] [UNK] company was founded in 2016. [SEP]",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["[UNK]", "'", "ll", "!", "!", "to", "?", "'", "d", "'", "'", "d", "of", ",", "can", "'", "t", "."],
+      ids: [101, 100, 112, 10856, 106, 106, 8228, 136, 112, 146, 112, 112, 146, 8205, 117, 9109, 112, 162, 119, 102],
+      decoded: "[CLS] [UNK]'ll!! to?'d'' d of, can't. [SEP]",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["de", "##f", "main", "(", ")", ":", "pass"],
+      ids: [101, 8363, 8189, 9139, 113, 114, 131, 9703, 102],
+      decoded: "[CLS] def main ( ) : pass [SEP]",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "ob", "##j", ".", "[UNK]", "(", ")", ";", "[UNK]", "(", ")", ";"],
+      ids: [101, 9946, 143, 134, 12639, 8334, 119, 100, 113, 114, 132, 100, 113, 114, 132, 102],
+      decoded: "[CLS] let a = obj. [UNK] ( ) ; [UNK] ( ) ; [SEP]",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["[UNK]", "is", "a", "test", "."],
+      ids: [101, 100, 8310, 143, 10060, 119, 102],
+      decoded: "[CLS] [UNK] is a test. [SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["[UNK]", ",", "running"],
+      ids: [101, 100, 117, 11620, 102],
+      decoded: "[CLS] [UNK], running [SEP]",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["[UNK]", "[UNK]"],
+      ids: [101, 100, 100, 102],
+      decoded: "[CLS] [UNK] [UNK] [SEP]",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["le", "##ad", "##ing", "space"],
+      ids: [101, 8983, 8695, 8221, 9634, 102],
+      decoded: "[CLS] leading space [SEP]",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["t", "##rail", "##ing", "space"],
+      ids: [101, 162, 12783, 8221, 9634, 102],
+      decoded: "[CLS] trailing space [SEP]",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["[UNK]", "[UNK]"],
+      ids: [101, 100, 100, 102],
+      decoded: "[CLS] [UNK] [UNK] [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "[UNK]", "#", "3", "\u20ac", "##4", "\u00a3", "##5", "\u00a5", "##6", "[UNK]", "[UNK]", "[UNK]", "test"],
+      ids: [101, 10060, 109, 122, 100, 108, 124, 359, 8159, 173, 8157, 175, 8158, 100, 100, 100, 10060, 102],
+      decoded: "[CLS] test $ 1 [UNK] # 3 \u20ac4 \u00a35 \u00a56 [UNK] [UNK] [UNK] test [SEP]",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["[UNK]", "bo", "##ugh", "##t", "an", "apple", "for", "$", "1", ".", "00", "at", "the", "store", "."],
+      ids: [101, 100, 11059, 12667, 8165, 9064, 8350, 8330, 109, 122, 119, 8136, 8243, 8174, 8719, 119, 102],
+      decoded: "[CLS] [UNK] bought an apple for $ 1. 00 at the store. [SEP]",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\ud83d\ude02", "\ud83d\udc4d", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "\ud83d\udd25", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "\ud83d\ude0e", "[UNK]", "[UNK]", "[UNK]", "\u2728", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [101, 8104, 8102, 100, 100, 100, 100, 100, 100, 8103, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 8105, 100, 100, 100, 501, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 102],
+      decoded: "[CLS] \ud83d\ude02 \ud83d\udc4d [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] \ud83d\udd25 [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] \ud83d\ude0e [UNK] [UNK] [UNK] \u2728 [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [SEP]",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2728", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [101, 501, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 102],
+      decoded: "[CLS] \u2728 [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [SEP]",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["[UNK]"],
+      ids: [101, 100, 102],
+      decoded: "[CLS] [UNK] [SEP]",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["[UNK]", "!", "how", "[UNK]", "[UNK]", "?"],
+      ids: [101, 100, 106, 9510, 100, 100, 136, 102],
+      decoded: "[CLS] [UNK]! how [UNK] [UNK]? [SEP]",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["[UNK]", "!", "how", "[UNK]", "[UNK]", "?"],
+      ids: [101, 100, 106, 9510, 100, 100, 136, 102],
+      decoded: "[CLS] [UNK]! how [UNK] [UNK]? [SEP]",
+    },
+  },
+  "Xenova/bge-base-zh-v1.5": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["how", "are", "you", "doi", "##ng", "?"],
+      ids: [101, 9510, 8995, 8357, 9962, 8291, 136, 102],
+      decoded: "[CLS] how are you doing? [SEP]",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["you", "sh", "##ould", "'", "ve", "don", "##e", "this"],
+      ids: [101, 8357, 11167, 11734, 112, 12810, 9524, 8154, 8554, 102],
+      decoded: "[CLS] you should've done this [SEP]",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["the", "company", "was", "f", "##ound", "##ed", "in", "2016", "."],
+      ids: [101, 8174, 10007, 9947, 148, 11477, 8303, 8217, 8112, 119, 102],
+      decoded: "[CLS] the company was founded in 2016. [SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["u", "##n", "##wan", "##ted", ",", "running"],
+      ids: [101, 163, 8171, 9951, 9255, 117, 11620, 102],
+      decoded: "[CLS] unwanted, running [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "r2", "#", "3", "\u20ac", "##4", "\u00a3", "##5", "\u00a5", "##6", "[UNK]", "[UNK]", "[UNK]", "test"],
+      ids: [101, 10060, 109, 122, 11345, 108, 124, 359, 8159, 173, 8157, 175, 8158, 100, 100, 100, 10060, 102],
+      decoded: "[CLS] test $ 1 r2 # 3 \u20ac4 \u00a35 \u00a56 [UNK] [UNK] [UNK] test [SEP]",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["i", "bo", "##ugh", "##t", "an", "apple", "for", "$", "1", ".", "00", "at", "the", "store", "."],
+      ids: [101, 151, 11059, 12667, 8165, 9064, 8350, 8330, 109, 122, 119, 8136, 8243, 8174, 8719, 119, 102],
+      decoded: "[CLS] i bought an apple for $ 1. 00 at the store. [SEP]",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\ud83d\ude02", "\ud83d\udc4d", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "\ud83d\udd25", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "\u2764", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "\ud83d\ude0e", "[UNK]", "[UNK]", "[UNK]", "\u2728", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [101, 8104, 8102, 100, 100, 100, 100, 100, 100, 8103, 100, 100, 100, 100, 100, 506, 100, 100, 100, 100, 100, 8105, 100, 100, 100, 501, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 102],
+      decoded: "[CLS] \ud83d\ude02 \ud83d\udc4d [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] \ud83d\udd25 [UNK] [UNK] [UNK] [UNK] [UNK] \u2764 [UNK] [UNK] [UNK] [UNK] [UNK] \ud83d\ude0e [UNK] [UNK] [UNK] \u2728 [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [SEP]",
+    },
+  },
+  "Xenova/indobert-base-p1": {
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["you", "sho", "##uld", "'", "ve", "don", "##e", "this"],
+      ids: [2, 3299, 9596, 15370, 30463, 28239, 4081, 30357, 5379, 3],
+      decoded: "[CLS] you should've done this [SEP]",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["the", "company", "was", "found", "##ed", "in", "2016", "."],
+      ids: [2, 1002, 9105, 2738, 11009, 133, 48, 1538, 30470, 3],
+      decoded: "[CLS] the company was founded in 2016. [SEP]",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "ob", "##j", ".", "tos", "##trin", "##g", "(", ")", ";", "tos", "##trin", "##g", "(", ")", ";"],
+      ids: [2, 4734, 253, 30475, 559, 30372, 30470, 20498, 12448, 30365, 30464, 30465, 30473, 20498, 12448, 30365, 30464, 30465, 30473, 3],
+      decoded: "[CLS] let a = obj. tostring ( ) ; tostring ( ) ; [SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["un", "##wan", "##te", "##d", ",", "running"],
+      ids: [2, 78, 1322, 3298, 30364, 30468, 22715, 3],
+      decoded: "[CLS] unwanted, running [SEP]",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [2, 1, 1, 1, 1, 1, 1, 3],
+      decoded: "[CLS] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [SEP]",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["lead", "##ing", "space"],
+      ids: [2, 9196, 55, 14561, 3],
+      decoded: "[CLS] leading space [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "r", "##2", "#", "3", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "test"],
+      ids: [2, 4243, 30460, 111, 56, 30378, 30459, 283, 1, 1, 1, 1, 1, 1, 4243, 3],
+      decoded: "[CLS] test $ 1 r2 # 3 [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] test [SEP]",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["i", "bo", "##ught", "an", "apple", "for", "$", "1", ".", "00", "at", "the", "store", "."],
+      ids: [2, 89, 1880, 25009, 223, 7761, 1548, 30460, 111, 30470, 4230, 117, 1002, 8052, 30470, 3],
+      decoded: "[CLS] i bought an apple for $ 1. 00 at the store. [SEP]",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["wei", "##rd", "[UNK]", "edge", "[UNK]", "case"],
+      ids: [2, 27753, 12548, 1, 21418, 1, 13687, 3],
+      decoded: "[CLS] weird [UNK] edge [UNK] case [SEP]",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["hallo", "!", "how", "are", "you", "?"],
+      ids: [2, 19598, 30457, 11088, 5811, 3299, 30477, 3],
+      decoded: "[CLS] hallo! how are you? [SEP]",
+    },
+  },
+  "Xenova/spanbert-large-cased": {
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "o", "##b", "##j", ".", "to", "##st", "##ring", "(", ")", ";", "to", "##st", "##ring", "(", ")", ";"],
+      ids: [101, 1519, 170, 134, 184, 1830, 3361, 119, 1106, 2050, 3384, 113, 114, 132, 1106, 2050, 3384, 113, 114, 132, 102],
+      decoded: "[CLS] let a = obj. tostring ( ) ; tostring ( ) ; [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "r", "##2", "#", "3", "\u20ac", "##4", "\u00a3", "##5", "\u00a5", "##6", "[UNK]", "\u20b9", "##8", "\u20b1", "##9", "test"],
+      ids: [101, 2774, 109, 122, 187, 1477, 108, 124, 836, 1527, 202, 1571, 203, 1545, 100, 838, 1604, 837, 1580, 2774, 102],
+      decoded: "[CLS] test $ 1 r2 # 3 \u20ac4 \u00a35 \u00a56 [UNK] \u20b98 \u20b19 test [SEP]",
+    },
+  },
+  "Xenova/UMLSBert_ENG": {
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "obj", ".", "tos", "##tring", "(", ")", ";", "tos", "##tring", "(", ")", ";"],
+      ids: [2, 8894, 42, 32, 2473, 17, 22660, 23640, 11, 12, 30, 22660, 23640, 11, 12, 30, 3],
+      decoded: "[CLS] let a = obj. tostring ( ) ; tostring ( ) ; [SEP]",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["hel", "##lo", "world"],
+      ids: [2, 3018, 5368, 4517, 3],
+      decoded: "[CLS] hello world [SEP]",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hel", "##lo", "world"],
+      ids: [2, 3018, 5368, 4517, 3],
+      decoded: "[CLS] hello world [SEP]",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["hi", "hel", "##lo"],
+      ids: [2, 11245, 3018, 5368, 3],
+      decoded: "[CLS] hi hello [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "r2", "#", "3", "\u20ac", "##4", "\u00a3", "##5", "\u00a5", "##6", "\u20a3", "##7", "\u20b9", "##8", "\u20b1", "##9", "test"],
+      ids: [2, 2313, 7, 20, 9663, 6, 22, 528, 1017, 74, 1009, 76, 1018, 524, 1019, 531, 1011, 529, 1038, 2313, 3],
+      decoded: "[CLS] test $ 1 r2 # 3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test [SEP]",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "##ir", "##d", "\uff5e", "edge", "\uff5e", "case"],
+      ids: [2, 1802, 1753, 1022, 943, 9676, 943, 2632, 3],
+      decoded: "[CLS] weird \uff5e edge \uff5e case [SEP]",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["hel", "##lo"],
+      ids: [2, 3018, 5368, 3],
+      decoded: "[CLS] hello [SEP]",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["hel", "##lo", "!", "how", "are", "you", "?"],
+      ids: [2, 3018, 5368, 5, 2135, 1810, 17915, 34, 3],
+      decoded: "[CLS] hello! how are you? [SEP]",
+    },
+  },
+  "Xenova/SapBERT-from-PubMedBERT-fulltext": {
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u751f", "\u6d3b", "\u7684", "[UNK]", "[UNK]", "\u662f"],
+      ids: [2, 799, 776, 811, 1, 1, 731, 3],
+      decoded: "[CLS] \u751f \u6d3b \u7684 [UNK] [UNK] \u662f [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "r2", "#", "3", "\u20ac", "##4", "\u00a3", "##5", "\u00a5", "##6", "[UNK]", "\u20b9", "##8", "[UNK]", "test"],
+      ids: [2, 2648, 8, 21, 7261, 7, 23, 281, 1006, 76, 1015, 78, 1016, 1, 282, 1025, 1, 2648, 3],
+      decoded: "[CLS] test $ 1 r2 # 3 \u20ac4 \u00a35 \u00a56 [UNK] \u20b98 [UNK] test [SEP]",
+    },
+  },
+  "Xenova/rubert-base-cased": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "are", "you", "do", "##ing", "?"],
+      ids: [101, 15474, 10813, 13540, 10661, 7729, 166, 102],
+      decoded: "[CLS] How are you doing? [SEP]",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "sh", "##oul", "##d", "'", "ve", "don", "##e", "this"],
+      ids: [101, 11577, 45942, 76143, 239, 118, 10835, 17450, 241, 11043, 102],
+      decoded: "[CLS] You should've done this [SEP]",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "comp", "##any", "was", "f", "##ound", "##ed", "in", "2016", "."],
+      ids: [101, 6821, 71382, 17927, 10646, 242, 71129, 7491, 10618, 8273, 132, 102],
+      decoded: "[CLS] The company was founded in 2016. [SEP]",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "ob", "##j", ".", "to", "##St", "##ring", "(", ")", ";", "to", "##St", "##ring", "(", ")", ";"],
+      ids: [101, 14107, 232, 162, 17851, 251, 132, 10626, 21568, 13647, 120, 122, 158, 10626, 21568, 13647, 120, 122, 158, 102],
+      decoded: "[CLS] let a = obj. toString ( ) ; toString ( ) ; [SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "##wan", "##t", "##\u00e9d", ",", "run", "##ning"],
+      ids: [101, 27090, 14906, 271, 84705, 128, 14607, 11781, 102],
+      decoded: "[CLS] UNwant\u00e9d, running [SEP]",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u751f", "\u6d3b", "\u7684", "\u771f", "[UNK]", "\u662f"],
+      ids: [101, 6104, 5480, 6222, 6273, 100, 4877, 102],
+      decoded: "[CLS] \u751f \u6d3b \u7684 \u771f [UNK] \u662f [SEP]",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["le", "##ading", "sp", "##ace"],
+      ids: [101, 10653, 73130, 33162, 13967, 102],
+      decoded: "[CLS] leading space [SEP]",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tra", "##ili", "##ng", "sp", "##ace"],
+      ids: [101, 11776, 14296, 10888, 33162, 13967, 102],
+      decoded: "[CLS] trailing space [SEP]",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "bo", "##ught", "an", "app", "##le", "for", "$", "1", ".", "00", "at", "the", "st", "##ore", "."],
+      ids: [101, 186, 21018, 53718, 10663, 73406, 7159, 10654, 112, 138, 132, 11537, 10672, 10617, 28668, 13536, 132, 102],
+      decoded: "[CLS] I bought an apple for $ 1. 00 at the store. [SEP]",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "##ird", "\uff5e", "ed", "##ge", "\uff5e", "cas", "##e"],
+      ids: [101, 12463, 36865, 10608, 11051, 11037, 10608, 15501, 241, 102],
+      decoded: "[CLS] weird \uff5e edge \uff5e case [SEP]",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["a", "##h", "\u535a", "\u63a8", "z", "##z"],
+      ids: [101, 232, 247, 3166, 4657, 282, 283, 102],
+      decoded: "[CLS] ah \u535a \u63a8 zz [SEP]",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["He", "##LL", "##o", "!", "ho", "##w", "Are", "yo", "##U", "?"],
+      ids: [101, 10869, 83346, 261, 106, 13685, 277, 14003, 14184, 211, 166, 102],
+      decoded: "[CLS] HeLLo! how Are yoU? [SEP]",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["H", "##\u00e4", "##LL", "##o", "!", "ho", "##w", "Are", "yo", "##U", "?"],
+      ids: [101, 184, 384, 83346, 261, 106, 13685, 277, 14003, 14184, 211, 166, 102],
+      decoded: "[CLS] H\u00e4LLo! how Are yoU? [SEP]",
+    },
+  },
+  "Xenova/kobert": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]", "?"],
+      ids: [2, 0, 0, 0, 0, 258, 3],
+      decoded: "[CLS] [UNK] [UNK] [UNK] [UNK]? [SEP]",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["[UNK]", "[UNK]", "'", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [2, 0, 0, 15, 0, 0, 0, 3],
+      decoded: "[CLS] [UNK] [UNK]'[UNK] [UNK] [UNK] [SEP]",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "[UNK]", "[UNK]", "[UNK]", "in", "[UNK]", "."],
+      ids: [2, 355, 0, 0, 0, 409, 0, 54, 3],
+      decoded: "[CLS] The [UNK] [UNK] [UNK] in [UNK]. [SEP]",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "'", "[UNK]", "!", "!", "[UNK]", "?", "'", "d", "'", "'", "d", "[UNK]", ",", "[UNK]", "'", "t", "."],
+      ids: [2, 264, 15, 0, 5, 5, 0, 258, 15, 388, 15, 15, 388, 0, 46, 0, 15, 442, 54, 3],
+      decoded: "[CLS] A'[UNK]!! [UNK]?'d'' d [UNK], [UNK]'t. [SEP]",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["[UNK]", "[UNK]", "(", ")", ":", "[UNK]"],
+      ids: [2, 0, 0, 18, 40, 249, 0, 3],
+      decoded: "[CLS] [UNK] [UNK] ( ) : [UNK] [SEP]",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["[UNK]", "a", "=", "[UNK]", ".", "[UNK]", "(", ")", ";", "[UNK]", "(", ")", ";"],
+      ids: [2, 0, 367, 254, 0, 54, 0, 18, 40, 252, 0, 18, 40, 252, 3],
+      decoded: "[CLS] [UNK] a = [UNK]. [UNK] ( ) ; [UNK] ( ) ; [SEP]",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["[UNK]", "is", "a", "[UNK]", "."],
+      ids: [2, 0, 412, 367, 0, 54, 3],
+      decoded: "[CLS] [UNK] is a [UNK]. [SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["[UNK]", ",", "[UNK]"],
+      ids: [2, 0, 46, 0, 3],
+      decoded: "[CLS] [UNK], [UNK] [SEP]",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u751f", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [2, 5298, 0, 0, 0, 0, 0, 3],
+      decoded: "[CLS] \u751f [UNK] [UNK] [UNK] [UNK] [UNK] [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["[UNK]", "$", "1", "[UNK]", "#", "3", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [2, 0, 10, 93, 0, 9, 142, 0, 0, 0, 0, 0, 0, 0, 3],
+      decoded: "[CLS] [UNK] $ 1 [UNK] # 3 [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [SEP]",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "[UNK]", "an", "[UNK]", "[UNK]", "$", "1", ".", "00", "at", "[UNK]", "[UNK]", "."],
+      ids: [2, 296, 0, 374, 0, 0, 10, 93, 54, 79, 377, 0, 0, 54, 3],
+      decoded: "[CLS] I [UNK] an [UNK] [UNK] $ 1. 00 at [UNK] [UNK]. [SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [2, 0, 0, 0, 0, 3],
+      decoded: "[CLS] [UNK] [UNK] [UNK] [UNK] [SEP]",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [2, 0, 0, 0, 0, 0, 3],
+      decoded: "[CLS] [UNK] [UNK] [UNK] [UNK] [UNK] [SEP]",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]", "\u2581", "."],
+      ids: [2, 0, 0, 0, 0, 517, 54, 3],
+      decoded: "[CLS] [UNK] [UNK] [UNK] [UNK] \u2581. [SEP]",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [2, 0, 0, 0, 0, 3],
+      decoded: "[CLS] [UNK] [UNK] [UNK] [UNK] [SEP]",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["[UNK]", "!", "[UNK]", "[UNK]", "[UNK]", "?"],
+      ids: [2, 0, 5, 0, 0, 0, 258, 3],
+      decoded: "[CLS] [UNK]! [UNK] [UNK] [UNK]? [SEP]",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["[UNK]", "!", "[UNK]", "[UNK]", "[UNK]", "?"],
+      ids: [2, 0, 5, 0, 0, 0, 258, 3],
+      decoded: "[CLS] [UNK]! [UNK] [UNK] [UNK]? [SEP]",
+    },
+  },
+  "Xenova/scibert_scivocab_uncased": {
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "obj", ".", "to", "##string", "(", ")", ";", "to", "##string", "(", ")", ";"],
+      ids: [102, 1293, 106, 275, 2324, 205, 147, 20301, 145, 546, 1814, 147, 20301, 145, 546, 1814, 103],
+      decoded: "[CLS] let a = obj. tostring ( ) ; tostring ( ) ; [SEP]",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["hi", "hell", "##o"],
+      ids: [102, 5305, 29423, 30112, 103],
+      decoded: "[CLS] hi hello [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "r", "##2", "#", "3", "\u20ac", "##4", "\u00a3", "##5", "\u00a5", "##6", "[UNK]", "[UNK]", "[UNK]", "test"],
+      ids: [102, 856, 3250, 158, 182, 30132, 3000, 239, 20801, 30140, 11221, 30139, 20704, 30142, 101, 101, 101, 856, 103],
+      decoded: "[CLS] test $ 1 r2 # 3 \u20ac4 \u00a35 \u00a56 [UNK] [UNK] [UNK] test [SEP]",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["ah", "[UNK]", "[UNK]", "zz"],
+      ids: [102, 7839, 101, 101, 23591, 103],
+      decoded: "[CLS] ah [UNK] [UNK] zz [SEP]",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["hell", "##o"],
+      ids: [102, 29423, 30112, 103],
+      decoded: "[CLS] hello [SEP]",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["hell", "##o", "!", "how", "are", "you", "?"],
+      ids: [102, 29423, 30112, 3190, 539, 220, 3034, 3912, 103],
+      decoded: "[CLS] hello! how are you? [SEP]",
+    },
+  },
+  "Xenova/LaBSE": {
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "obj", ".", "to", "##String", "(", ")", ";", "to", "##String", "(", ")", ";"],
+      ids: [101, 17214, 170, 134, 228877, 119, 14986, 368304, 113, 114, 132, 14986, 368304, 113, 114, 132, 102],
+      decoded: "[CLS] let a = obj. toString ( ) ; toString ( ) ; [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "R2", "#", "3", "\u20ac", "##4", "\u00a35", "\u00a5", "##6", "\u20a3", "##7", "\u20b9", "##8", "\u20b1", "##9", "test"],
+      ids: [101, 17678, 109, 122, 51222, 108, 124, 3030, 16006, 279082, 205, 16151, 3023, 16187, 3037, 16175, 3033, 16236, 17678, 102],
+      decoded: "[CLS] test $ 1 R2 # 3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test [SEP]",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581", "##This", "\u2581", "##is", "\u2581", "##a", "\u2581", "##test", "\u2581", "."],
+      ids: [101, 3283, 342068, 3283, 15319, 3283, 14983, 3283, 50149, 3283, 119, 102],
+      decoded: "[CLS] \u2581This \u2581is \u2581a \u2581test \u2581. [SEP]",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\ud83d\ude02", "\ud83d\udc4d", "\ud83e\udd23", "\ud83d\ude0d", "\ud83d\ude2d", "\ud83c\udf89", "\ud83d\ude4f", "\ud83d\ude0a", "\ud83d\udd25", "\ud83d\ude01", "\ud83d\ude05", "\ud83e\udd17", "\ud83d\ude06", "\ud83d\udc4f", "\u2764\ufe0f", "\ud83d\udc9c", "\ud83d\udc9a", "\ud83d\udc97", "\ud83d\udc99", "\ud83d\udda4", "\ud83d\ude0e", "\ud83d\udc4c", "\ud83e\udd73", "\ud83d\udcaa", "\u2728", "\ud83d\udc49", "\ud83d\udc40", "\ud83d\udcaf", "\ud83c\udf88", "\ud83d\ude48", "\ud83d\ude4c", "\ud83d\udc80", "\ud83d\udc47", "\ud83d\udc4b", "\u2705", "\ud83c\udf81", "\ud83c\udf1e", "\ud83c\udf38", "\ud83d\udcb0"],
+      ids: [101, 14820, 14617, 14933, 14831, 14863, 14496, 14893, 14828, 14775, 14819, 14823, 14926, 14824, 14619, 91822, 14687, 14685, 14682, 14684, 14810, 14832, 14616, 14956, 14701, 3496, 14613, 14606, 14706, 14495, 14887, 14891, 14660, 14611, 14615, 3465, 14488, 14416, 14430, 14707, 102],
+      decoded: "[CLS] \ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0 [SEP]",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2728", "\ud83e\udd17", "\ud83d\udc41\ufe0f", "\ud83d\udc71", "##\ud83c\udffb", "[UNK]", "[UNK]", "\ud83d\udc68", "##\ud83c\udffb", "##\ud83c\udf3e", "[UNK]", "\ud83d\udc69", "##\u2764", "##\ud83d\udc8b", "##\ud83d\udc68", "\ud83d\udc69", "##\ud83d\udc69", "##\ud83d\udc67", "##\ud83d\udc66", "[UNK]", "\ud83c\udff4", "\ud83d\udc68", "##\ud83c\udffb", "##\u2764", "##\ufe0f", "##\ud83d\udc8b", "##\ud83d\udc68", "##\ud83c\udffc"],
+      ids: [101, 3496, 14926, 350545, 14648, 130826, 100, 100, 14639, 130826, 498832, 100, 14640, 488649, 499065, 499034, 14640, 499035, 499033, 499032, 100, 14555, 14639, 130826, 488649, 44450, 499065, 499034, 421916, 102],
+      decoded: "[CLS] \u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb [UNK] [UNK] \ud83d\udc68\ud83c\udffb\ud83c\udf3e [UNK] \ud83d\udc69\u2764\ud83d\udc8b\ud83d\udc68 \ud83d\udc69\ud83d\udc69\ud83d\udc67\ud83d\udc66 [UNK] \ud83c\udff4 \ud83d\udc68\ud83c\udffb\u2764\ufe0f\ud83d\udc8b\ud83d\udc68\ud83c\udffc [SEP]",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["ah", "\u535a", "\u63a8", "zz"],
+      ids: [101, 15524, 4573, 6405, 441764, 102],
+      decoded: "[CLS] ah \u535a \u63a8 zz [SEP]",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["H\u00e9", "##llo"],
+      ids: [101, 220855, 23025, 102],
+      decoded: "[CLS] H\u00e9llo [SEP]",
+    },
+  },
+  "Xenova/herbert-large-cased": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["Ho", "w</w>", "are</w>", "you</w>", "do", "ing</w>", "?</w>"],
+      ids: [0, 5213, 1019, 25720, 20254, 2065, 5129, 1550, 2],
+      decoded: "<s>How are you doing? </s>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You</w>", "sho", "uld</w>", "'</w>", "ve</w>", "d", "one</w>", "this</w>"],
+      ids: [0, 32795, 14924, 48273, 1571, 6647, 72, 2290, 48846, 2],
+      decoded: "<s>You should've done this </s>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The</w>", "co", "mpany</w>", "was</w>", "fo", "un", "de", "d</w>", "in</w>", "20", "16</w>", ".</w>"],
+      ids: [0, 7117, 2406, 41449, 9873, 3435, 2195, 2101, 1038, 2651, 5646, 2555, 1899, 2],
+      decoded: "<s>The company was founded in 2016. </s>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A</w>", "'</w>", "ll</w>", "!</w>", "!</w>", "to</w>", "?</w>", "'</w>", "d</w>", "'</w>", "'</w>", "d</w>", "of</w>", ",</w>", "can</w>", "'</w>", "t</w>", ".</w>"],
+      ids: [0, 1012, 1571, 9396, 1725, 1725, 2063, 1550, 1571, 1038, 1571, 1571, 1038, 6595, 1947, 26794, 1571, 1026, 1899, 2],
+      decoded: "<s>A'll!! to?'d'' d of, can't. </s>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["de", "f</w>", "main</w>", "(</w>", ")</w>", ":</w>", "pa", "ss</w>"],
+      ids: [0, 2101, 1050, 41851, 1341, 1940, 1335, 2083, 5357, 2],
+      decoded: "<s>def main ( ) : pass </s>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let</w>", "a</w>", "=</w>", "ob", "j</w>", ".</w>", "to", "S", "tr", "ing</w>", "(</w>", ")</w>", ";</w>", "to", "S", "tr", "ing</w>", "(</w>", ")</w>", ";</w>"],
+      ids: [0, 11324, 1011, 1789, 2033, 1013, 1899, 2146, 55, 2518, 5129, 1341, 1940, 1195, 2146, 55, 2518, 5129, 1341, 1940, 1195, 2],
+      decoded: "<s>let a = obj. toString ( ) ; toString ( ) ; </s>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["T", "his</w>", "is</w>", "a</w>", "test</w>", ".</w>"],
+      ids: [0, 56, 22855, 6869, 1011, 14825, 1899, 2],
+      decoded: "<s>This is a test. </s>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "wan", "t", "\u00e9", "d</w>", ",</w>", "run", "ning</w>"],
+      ids: [0, 23029, 2688, 88, 163, 1038, 1947, 4980, 17843, 2],
+      decoded: "<s>UNwant\u00e9d, running </s>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["123</w>"],
+      ids: [0, 19049, 2],
+      decoded: "<s>123 </s>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hel", "lo</w>", "World</w>"],
+      ids: [0, 12156, 6170, 21207, 2],
+      decoded: "<s>Hello World </s>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hel", "lo</w>", "world</w>"],
+      ids: [0, 11526, 6170, 38188, 2],
+      decoded: "<s>hello world </s>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["<unk>", "<unk>", "<unk>", "<unk>", "<unk>", "\u662f</w>"],
+      ids: [0, 3, 3, 3, 3, 3, 1776, 2],
+      decoded: "<s><unk><unk><unk><unk><unk>\u662f </s>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["le", "ad", "ing</w>", "space</w>"],
+      ids: [0, 2018, 2035, 5129, 46489, 2],
+      decoded: "<s>leading space </s>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tra", "i", "ling</w>", "space</w>"],
+      ids: [0, 2201, 77, 16342, 46489, 2],
+      decoded: "<s>trailing space </s>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["H", "i</w>", "Hel", "lo</w>"],
+      ids: [0, 44, 1009, 12156, 6170, 2],
+      decoded: "<s>Hi Hello </s>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test</w>", "$</w>", "1</w>", "R", "2</w>", "#</w>", "3</w>", "\u20ac", "4</w>", "\u00a3", "5</w>", "<unk>", "6</w>", "<unk>", "7</w>", "<unk>", "8</w>", "<unk>", "9</w>", "test</w>"],
+      ids: [0, 14825, 1927, 1029, 54, 1025, 1393, 1034, 706, 1018, 100, 1008, 3, 1036, 3, 1030, 3, 1064, 3, 1017, 14825, 2],
+      decoded: "<s>test $ 1 R2 # 3 \u20ac4 \u00a35 <unk>6 <unk>7 <unk>8 <unk>9 test </s>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I</w>", "bou", "ght</w>", "an</w>", "ap", "ple</w>", "for</w>", "$</w>", "1</w>", ".</w>", "00</w>", "at</w>", "the</w>", "st", "ore</w>", ".</w>"],
+      ids: [0, 1056, 13016, 15272, 2879, 10309, 20861, 15181, 1927, 1029, 1899, 2291, 4772, 6854, 1989, 24005, 1899, 2],
+      decoded: "<s>I bought an apple for $ 1. 00 at the store. </s>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you</w>", "\u2026</w>"],
+      ids: [0, 20254, 1826, 2],
+      decoded: "<s>you \u2026 </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you</w>", "\u2026</w>"],
+      ids: [0, 20254, 1826, 2],
+      decoded: "<s>you \u2026 </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you</w>", "\u2026</w>", "you</w>", "\u2026</w>"],
+      ids: [0, 20254, 1826, 20254, 1826, 2],
+      decoded: "<s>you \u2026 you \u2026 </s>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ir", "d</w>", "<unk>", "e", "dge</w>", "<unk>", "ca", "se</w>"],
+      ids: [0, 2149, 17435, 1038, 3, 73, 25801, 3, 3833, 4417, 2],
+      decoded: "<s>weird <unk>edge <unk>case </s>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["<unk>", "T", "his</w>", "<unk>", "is</w>", "<unk>", "a</w>", "<unk>", "test</w>", "<unk>", ".</w>"],
+      ids: [0, 3, 56, 22855, 3, 6869, 3, 1011, 3, 14825, 3, 1899, 2],
+      decoded: "<s><unk>This <unk>is <unk>a <unk>test <unk>. </s>",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["a", "h</w>", "<unk>", "<unk>", "zz</w>"],
+      ids: [0, 69, 1021, 3, 3, 49185, 2],
+      decoded: "<s>ah <unk><unk>zz </s>",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["H", "\u00e9", "l", "lo</w>"],
+      ids: [0, 44, 163, 80, 6170, 2],
+      decoded: "<s>H\u00e9llo </s>",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["He", "L", "L", "o</w>", "!</w>", "ho", "w</w>", "Ar", "e</w>", "yo", "U</w>", "?</w>"],
+      ids: [0, 4596, 48, 48, 1007, 1725, 3145, 1019, 2921, 1015, 13908, 1041, 1550, 2],
+      decoded: "<s>HeLLo! how Are yoU? </s>",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["H", "\u00e4", "L", "L", "o</w>", "!</w>", "ho", "w</w>", "Ar", "e</w>", "yo", "U</w>", "?</w>"],
+      ids: [0, 44, 158, 48, 48, 1007, 1725, 3145, 1019, 2921, 1015, 13908, 1041, 1550, 2],
+      decoded: "<s>H\u00e4LLo! how Are yoU? </s>",
+    },
+  },
+  "Xenova/ernie-gram-zh": {
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "r2", "#", "3", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "test"],
+      ids: [1, 6943, 18005, 208, 6847, 9474, 284, 18017, 18017, 18017, 18017, 18017, 18017, 6943, 2],
+      decoded: "[CLS] test $ 1 r2 # 3 [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] test [SEP]",
+    },
+  },
+};
diff --git a/tests/models/blenderbot_small/tokenization.js b/tests/models/blenderbot_small/tokenization.js
new file mode 100644
index 000000000..6bf4bbb93
--- /dev/null
+++ b/tests/models/blenderbot_small/tokenization.js
@@ -0,0 +1,166 @@
+import { BlenderbotSmallTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, BLENDERBOT_SMALL_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = BlenderbotSmallTokenizer;
+
+// NOTE: `.tokenize()` is disabled for BlenderbotSmallTokenizer
+export const TEST_CONFIG = {
+  "Xenova/blenderbot_small-90M": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      // "tokens": ["how", "are", "you", "doing", "?"],
+      ids: [102, 46, 15, 267, 20],
+      decoded: "how are you doing?",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      // "tokens": ["you", "should", "'", "ve", "done", "this"],
+      ids: [15, 197, 8, 117, 369, 36],
+      decoded: "you should've done this",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      // "tokens": ["0@@", "1@@", "2@@", "3@@", "4@@", "5@@", "6@@", "7@@", "89", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "100", "1000"],
+      ids: [1988, 2388, 735, 801, 827, 948, 981, 1110, 4814, 520, 143, 176, 216, 260, 253, 345, 374, 420, 475, 316, 773, 6217],
+      decoded: "0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      // "tokens": ["the", "company", "was", "founded", "in", "2016", "."],
+      ids: [7, 293, 18, 912, 13, 845, 5],
+      decoded: "the company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      // "tokens": ["a", "__newln__", "'", "ll", "!", "!@@", "to", "?", "'", "d", "'", "'", "d", "of", ",", "can", "'", "t", "."],
+      ids: [12, 4, 8, 97, 37, 3, 11, 20, 8, 85, 8, 8, 85, 10, 6, 62, 8, 30, 5],
+      decoded: "a __newln__'ll! __unk__ to?'d'' d of, can't.",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      // "tokens": ["def", "main", "(", ")@@", ":", "__newln__", "pass"],
+      ids: [21996, 550, 40, 3, 106, 4, 1314],
+      decoded: "def main ( __unk__ : __newln__ pass",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      // "tokens": ["let", "a", "=", "ob@@", "j", ".@@", "to@@", "string", "(", ")@@", ";", "__newln__", "to@@", "string", "(", ")@@", ";"],
+      ids: [131, 12, 1381, 2808, 755, 3, 752, 4529, 40, 3, 118, 4, 752, 4529, 40, 3, 118],
+      decoded: "let a = obj __unk__ tostring ( __unk__ ; __newln__ tostring ( __unk__ ;",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      // "tokens": ["this", "__newln__", "is", "__newln__", "a", "__newln__", "test", "."],
+      ids: [36, 4, 24, 4, 12, 4, 1248, 5],
+      decoded: "this __newln__ is __newln__ a __newln__ test.",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      // "tokens": ["un@@", "wan@@", "t@@", "\u00e9@@", "d", ",@@", "running"],
+      ids: [204, 4151, 291, 1677, 85, 3, 785],
+      decoded: "unwant\u00e9d __unk__ running",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      // "tokens": ["1@@", "\u0000@@", "2@@", "\ufffd@@", "3"],
+      ids: [2388, 3, 735, 3, 216],
+      decoded: "1__unk__ 2__unk__ 3",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      // "tokens": ["hello", "world"],
+      ids: [880, 159],
+      decoded: "hello world",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      // "tokens": ["hello", "world"],
+      ids: [880, 159],
+      decoded: "hello world",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      // "tokens": ["\u751f@@", "\u6d3b@@", "\u7684@@", "\u771f@@", "\u8c1b@@", "\u662f"],
+      ids: [30488, 32756, 29891, 30813, 3, 34037],
+      decoded: "\u751f\u6d3b\u7684\u771f__unk__ \u662f",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      // "tokens": ["leading", "space"],
+      ids: [1164, 833],
+      decoded: "leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      // "tokens": ["trailing", "space"],
+      ids: [12499, 833],
+      decoded: "trailing space",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      // "tokens": ["hi", "hello"],
+      ids: [792, 880],
+      decoded: "hi hello",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      // "tokens": ["test", "$@@", "1", "r@@", "2", "#@@", "3", "\u20ac@@", "4", "\u00a3@@", "5", "\u00a5@@", "6", "\u20a3@@", "7", "\u20b9@@", "8", "\u20b1@@", "9", "test"],
+      ids: [1248, 3, 143, 510, 176, 3, 216, 3, 260, 3, 253, 3, 345, 3, 374, 3, 420, 3, 475, 1248],
+      decoded: "test __unk__ 1 r2 __unk__ 3 __unk__ 4 __unk__ 5 __unk__ 6 __unk__ 7 __unk__ 8 __unk__ 9 test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      // "tokens": ["i", "bought", "an", "apple", "for", "$@@", "1", ".@@", "00", "at", "the", "store", "."],
+      ids: [14, 1890, 50, 4758, 26, 3, 143, 3, 1966, 32, 7, 1640, 5],
+      decoded: "i bought an apple for __unk__ 1 __unk__ 00 at the store.",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      // "tokens": ["you@@", "\u2026"],
+      ids: [7984, 1244],
+      decoded: "you\u2026",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      // "tokens": ["you@@", "\u2026"],
+      ids: [7984, 1244],
+      decoded: "you\u2026",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      // "tokens": ["you@@", "\u2026", "you@@", "\u2026"],
+      ids: [7984, 1244, 7984, 1244],
+      decoded: "you\u2026 you\u2026",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      // "tokens": ["weird", "\uff5e", "edge", "\uff5e", "case"],
+      ids: [2614, 30831, 1649, 30831, 543],
+      decoded: "weird \uff5e edge \uff5e case",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      // "tokens": ["\u2581@@", "this", "\u2581@@", "is", "\u2581@@", "a", "\u2581@@", "test", "\u2581", "."],
+      ids: [3, 36, 3, 24, 3, 12, 3, 1248, 50106, 5],
+      decoded: "__unk__ this __unk__ is __unk__ a __unk__ test \u2581.",
+    },
+    SPECIAL_TOKENS: {
+      text: BLENDERBOT_SMALL_TEST_STRINGS.SPECIAL_TOKENS,
+      // "tokens": ["__start__", "hello", "world", "__end__"],
+      ids: [1, 880, 159, 2],
+      decoded: "__start__ hello world __end__",
+    },
+    WHITESPACE_1: {
+      text: BLENDERBOT_SMALL_TEST_STRINGS.WHITESPACE_1,
+      // "tokens": ["__start__", "hey", "__end__"],
+      ids: [1, 226, 2],
+      decoded: "__start__ hey __end__",
+    },
+    WHITESPACE_2: {
+      text: BLENDERBOT_SMALL_TEST_STRINGS.WHITESPACE_2,
+      // "tokens": ["__start__", "hey", "__end__"],
+      ids: [1, 226, 2],
+      decoded: "__start__ hey __end__",
+    },
+  },
+};
diff --git a/tests/models/bloom/tokenization.js b/tests/models/bloom/tokenization.js
new file mode 100644
index 000000000..03b95d63a
--- /dev/null
+++ b/tests/models/bloom/tokenization.js
@@ -0,0 +1,194 @@
+import { BloomTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, BLOOM_TEST_STRINGS, SENTENCEPIECE_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = BloomTokenizer;
+export const TEST_CONFIG = {
+  "Xenova/bloom-560m": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "\u0120are", "\u0120you", "\u0120doing", "?"],
+      ids: [7572, 1306, 1152, 12491, 34],
+      decoded: "How are you doing?",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "\u0120should", "'ve", "\u0120done", "\u0120this"],
+      ids: [5448, 3403, 7300, 11541, 1119],
+      decoded: "You should've done this",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0123", "456789", "\u01200", "\u01201", "\u01202", "\u01203", "\u01204", "\u01205", "\u01206", "\u01207", "\u01208", "\u01209", "\u012010", "\u0120100", "\u01201000"],
+      ids: [166660, 145647, 931, 404, 415, 735, 934, 973, 1231, 1392, 1445, 1575, 1581, 4334, 19526],
+      decoded: "0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120founded", "\u0120in", "\u01202016", "."],
+      ids: [2175, 16333, 1620, 88289, 361, 5854, 17],
+      decoded: "The company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'ll", "\u0120!!", "to", "?", "'d", "''", "d", "\u0120of", ",", "\u0120can't", "."],
+      ids: [36, 189, 8722, 49825, 1025, 34, 10628, 2328, 71, 461, 15, 11229, 17],
+      decoded: "A\n'll !!to?'d''d of, can't.",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "\u0120main", "()", ":", "\u010a\u0109", "pass"],
+      ids: [7564, 4291, 883, 29, 1582, 12608],
+      decoded: "def main():\n\tpass",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120obj", ".", "toString", "()", ";", "\u010a", "toString", "()", ";"],
+      ids: [2963, 267, 564, 17949, 17, 27392, 883, 30, 189, 27392, 883, 30],
+      decoded: "let a = obj.toString();\ntoString();",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["This", "\u010a\u010a", "is", "\u010a", "a", "\u010a", "test", "."],
+      ids: [6168, 603, 290, 189, 68, 189, 9234, 17],
+      decoded: "This\n\nis\na\ntest.",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "want", "\u00c3\u00a9d", ",", "running"],
+      ids: [5777, 75642, 2454, 15, 101897],
+      decoded: "UNwant\u00e9d,running",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0100", "2", "\u00ef\u00bf\u00bd", "3"],
+      ids: [20, 179, 21, 23181, 22],
+      decoded: "1\u00002\ufffd3",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hello", "\u0120World"],
+      ids: [59414, 12155],
+      decoded: "Hello World",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello", "\u0120world"],
+      ids: [101579, 8876],
+      decoded: "hello world",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141\u00e6\u00b4\u00bb\u00e7\u013c\u0126", "\u00e7\u013e\u0141", "\u00e8\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [71167, 4137, 1927, 239, 644],
+      decoded: "\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120\u0120", "\u0120leading", "\u0120space"],
+      ids: [250, 36128, 12978],
+      decoded: "   leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tra", "iling", "\u0120space", "\u0120\u0120\u0120"],
+      ids: [1900, 17022, 12978, 416],
+      decoded: "trailing space   ",
+    },
+    SURROUNDING_SPACE: {
+      text: BASE_TEST_STRINGS.SURROUNDING_SPACE,
+      tokens: ["\u0120\u0120", "\u0120surrounding", "\u0120space", "\u0120\u0120\u0120"],
+      ids: [250, 66599, 12978, 416],
+      decoded: "   surrounding space   ",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["Hi", "\u0120", "\u0120Hello"],
+      ids: [30050, 210, 86153],
+      decoded: "Hi  Hello",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$1", "\u0120R2", "\u0120#3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2\u00a5", "6", "\u0120\u00e2\u0124", "\u00a3", "7", "\u0120\u00e2\u0124\u00b9", "8", "\u0120\u00e2\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [9234, 41448, 80774, 201642, 20117, 23, 40300, 24, 62153, 25, 72279, 100, 26, 120434, 27, 72279, 113, 28, 4006],
+      decoded: "test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120bought", "\u0120an", "\u0120apple", "\u0120for", "\u0120$1", ".", "00", "\u0120at", "\u0120the", "\u0120store", "."],
+      ids: [44, 87926, 660, 101091, 613, 41448, 17, 462, 919, 368, 18706, 17],
+      decoded: "I bought an apple for $1.00 at the store.",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u0120\u0120"],
+      ids: [23438, 4346, 250],
+      decoded: "you\u2026  ",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [23438, 4346, 12361],
+      decoded: "you\u2026\u00a0\u00a0",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142", "you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [23438, 4346, 12361, 23438, 4346, 12361],
+      decoded: "you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120\u00ef\u00bd", "\u0140", "\u0120edge", "\u0120\u00ef\u00bd", "\u0140", "\u0120case"],
+      ids: [2136, 7589, 122354, 242, 29655, 122354, 242, 4462],
+      decoded: "weird \uff5e edge \uff5e case",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u00e2\u0138", "\u0123", "This", "\u0120\u00e2\u0138", "\u0123", "is", "\u0120\u00e2\u0138", "\u0123", "a", "\u0120\u00e2\u0138", "\u0123", "test", "\u0120\u00e2\u0138", "\u0123", "."],
+      ids: [26127, 213, 6168, 15299, 213, 290, 15299, 213, 68, 15299, 213, 9234, 15299, 213, 17],
+      decoded: "\u2581This \u2581is \u2581a \u2581test \u2581.",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u00f0\u0141\u013a", "\u0124", "\u0120\u00f0\u0141", "\u0133", "\u012f", "\u0120\u00f0\u0141", "\u00a4", "\u00a3", "\u0120\u00f0\u0141\u013a", "\u012f", "\u0120\u00f0\u0141\u013a", "\u0143", "\u0120\u00f0\u0141", "\u0130", "\u012b", "\u0120\u00f0\u0141", "\u013b", "\u0131", "\u0120\u00f0\u0141\u013a", "\u012c", "\u0120\u00f0\u0141", "\u0136", "\u00a5", "\u0120\u00f0\u0141\u013a", "\u0123", "\u0120\u00f0\u0141\u013a", "\u0127", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141\u013a", "\u0128", "\u0120\u00f0\u0141", "\u0133", "\u0131", "\u0120\u00e2\u013f", "\u00a4", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141", "\u0134", "\u013e", "\u0120\u00f0\u0141", "\u0134", "\u013c", "\u0120\u00f0\u0141", "\u0134", "\u0139", "\u0120\u00f0\u0141", "\u0134", "\u013b", "\u0120\u00f0\u0141", "\u0138", "\u00a4", "\u0120\u00f0\u0141\u013a", "\u0130", "\u0120\u00f0\u0141", "\u0133", "\u012e", "\u0120\u00f0\u0141", "\u00a5", "\u00b3", "\u0120\u00f0\u0141", "\u0134", "\u00aa", "\u0120\u00e2\u013e", "\u00a8", "\u0120\u00f0\u0141", "\u0133", "\u012b", "\u0120\u00f0\u0141", "\u0133", "\u0122", "\u0120\u00f0\u0141", "\u0134", "\u00af", "\u0120\u00f0\u0141", "\u0130", "\u012a", "\u0120\u00f0\u0141", "\u013b", "\u012a", "\u0120\u00f0\u0141", "\u013b", "\u012e", "\u0120\u00f0\u0141", "\u0134", "\u0122", "\u0120\u00f0\u0141", "\u0133", "\u0129", "\u0120\u00f0\u0141", "\u0133", "\u012d", "\u0120\u00e2\u013e", "\u0127", "\u0120\u00f0\u0141", "\u0130", "\u0123", "\u0120\u00f0\u0141", "\u012e", "\u0140", "\u0120\u00f0\u0141", "\u012e", "\u00b8", "\u0120\u00f0\u0141", "\u0134", "\u00b0"],
+      ids: [127322, 214, 41234, 229, 225, 41234, 101, 100, 126342, 225, 126342, 245, 41234, 226, 221, 41234, 237, 227, 126342, 222, 41234, 232, 102, 126342, 213, 126342, 217, 41234, 101, 235, 126342, 218, 41234, 229, 227, 189367, 101, 116057, 41234, 230, 240, 41234, 230, 238, 41234, 230, 235, 41234, 230, 237, 41234, 234, 101, 126342, 226, 41234, 229, 224, 41234, 102, 115, 41234, 230, 107, 76758, 105, 41234, 229, 221, 41234, 229, 212, 41234, 230, 111, 41234, 226, 220, 41234, 237, 220, 41234, 237, 224, 41234, 230, 212, 41234, 229, 219, 41234, 229, 223, 76758, 217, 41234, 226, 213, 41234, 224, 242, 41234, 224, 120, 41234, 230, 112],
+      decoded: "\ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u00e2\u013e", "\u00a8", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141", "\u0133", "\u0123", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141", "\u0133", "\u00b1", "\u00f0\u0141\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0137", "\u00b5", "\u00e2\u0122\u012f", "\u00e2\u013b", "\u0124", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141", "\u00a7", "\u013b", "\u00f0\u0141\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00e2\u013b", "\u0124", "\u0120\u00f0\u0141", "\u0133", "\u00a8", "\u00f0\u0141\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u012e", "\u00be", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u0120\u00f0\u0141", "\u0133", "\u00a9", "\u00e2\u0122\u012f", "\u00e2\u013f", "\u00a4", "\u00e2\u0122\u012f", "\u00f0\u0141\u0134", "\u012d", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a8", "\u0120\u00f0\u0141", "\u0133", "\u00a9", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a9", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a7", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a6", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0131", "\u00b4", "\u00f3", "\u0142", "\u0123", "\u00a7", "\u00f3", "\u0142", "\u0123", "\u00a2", "\u00f3", "\u0142", "\u0123", "\u00a5", "\u00f3", "\u0142", "\u0123", "\u00ae", "\u00f3", "\u0142", "\u0123", "\u00a7", "\u00f3", "\u0142", "\u0123", "\u00bf", "\u0120\u00f0\u0141", "\u0133", "\u00a8", "\u00f0\u0141\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00e2\u013f", "\u00a4", "\u00ef\u00b8\u0131", "\u00e2\u0122\u012f", "\u00f0\u0141\u0134", "\u012d", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a8", "\u00f0\u0141\u0131", "\u00bc"],
+      ids: [120709, 105, 41234, 101, 235, 41234, 229, 213, 116057, 41234, 229, 113, 244635, 123, 41234, 233, 117, 1553, 15596, 214, 116057, 41234, 104, 237, 244635, 123, 1553, 15596, 214, 41234, 229, 105, 244635, 123, 1553, 22618, 224, 126, 41234, 104, 229, 1553, 22618, 101, 241, 1553, 22618, 104, 229, 41234, 229, 106, 1553, 157147, 101, 1553, 139500, 223, 1553, 22618, 229, 105, 41234, 229, 106, 1553, 22618, 229, 106, 1553, 22618, 229, 104, 1553, 22618, 229, 103, 41234, 104, 229, 244635, 123, 1553, 22618, 101, 241, 1553, 22618, 104, 229, 244635, 123, 41234, 227, 116, 177, 244, 213, 104, 177, 244, 213, 99, 177, 244, 213, 102, 177, 244, 213, 110, 177, 244, 213, 104, 177, 244, 213, 127, 41234, 229, 105, 244635, 123, 1553, 157147, 101, 116057, 1553, 139500, 223, 1553, 22618, 229, 105, 244635, 124],
+      decoded: "\u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75\u200d\u2642\ufe0f \ud83e\uddd9\ud83c\udffb\u200d\u2642 \ud83d\udc68\ud83c\udffb\u200d\ud83c\udf3e \ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1 \ud83d\udc69\u200d\u2764\u200d\ud83d\udc8b\u200d\ud83d\udc68 \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 \ud83e\uddd1\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc68\ud83c\udffc",
+    },
+    ONLY_WHITESPACE: {
+      text: BASE_TEST_STRINGS.ONLY_WHITESPACE,
+      tokens: ["\u0120\u0109", "\u010a"],
+      ids: [33651, 189],
+      decoded: " \t\n",
+    },
+    END_OF_SENTENCE_PUNCTUATION: {
+      text: BLOOM_TEST_STRINGS.END_OF_SENTENCE_PUNCTUATION,
+      tokens: ["test", ".", "\u0120test", ",", "\u0120test", "!", "\u0120test", "?", "\u0120test", "\u00e2\u0122\u00a6", "\u0120test", "\u00e3\u0122\u0124", "\u0120test", "\u00ef\u00bc\u012e", "\u0120test", "\u00e3\u0122\u0123", "\u0120test", "\u00e0\u00a5\u00a4", "\u0120test", "\u00db\u0136", "\u0120test", "\u00d8\u012e", "\u0120test"],
+      ids: [9234, 17, 4006, 15, 4006, 4, 4006, 34, 4006, 4346, 4006, 420, 4006, 355, 4006, 594, 4006, 527, 4006, 1174, 4006, 687, 4006],
+      decoded: "test. test, test! test? test\u2026 test\u3002 test\uff0c test\u3001 test\u0964 test\u06d4 test\u060c test",
+    },
+    SPECIAL_WITH_TRAILING_WHITESPACE: {
+      text: SENTENCEPIECE_TEST_STRINGS.SPECIAL_WITH_TRAILING_WHITESPACE,
+      tokens: ["<s>", "\u010a"],
+      ids: [1, 189],
+      decoded: "<s>\n",
+    },
+    SPECIAL_SURROUNDED_BY_WHITESPACE: {
+      text: SENTENCEPIECE_TEST_STRINGS.SPECIAL_SURROUNDED_BY_WHITESPACE,
+      tokens: ["\u0120", "</s>", "\u0120test", "\u0120", "</s>", "\u0120"],
+      ids: [210, 2, 4006, 210, 2, 210],
+      decoded: " </s> test </s> ",
+    },
+    SPECIAL_NO_WHITESPACE: {
+      text: SENTENCEPIECE_TEST_STRINGS.SPECIAL_NO_WHITESPACE,
+      tokens: ["</s>", "test", "</s>"],
+      ids: [2, 9234, 2],
+      decoded: "</s>test</s>",
+    },
+  },
+};
diff --git a/tests/models/clip/tokenization.js b/tests/models/clip/tokenization.js
new file mode 100644
index 000000000..73cacda3c
--- /dev/null
+++ b/tests/models/clip/tokenization.js
@@ -0,0 +1,166 @@
+import { CLIPTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = CLIPTokenizer;
+export const TEST_CONFIG = {
+  "Xenova/clip-vit-base-patch16": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["how</w>", "are</w>", "you</w>", "doing</w>", "?</w>"],
+      ids: [49406, 829, 631, 592, 1960, 286, 49407],
+      decoded: "<|startoftext|>how are you doing? <|endoftext|>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["you</w>", "should</w>", "'ve</w>", "done</w>", "this</w>"],
+      ids: [49406, 592, 1535, 1200, 1700, 589, 49407],
+      decoded: "<|startoftext|>you should've done this <|endoftext|>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0</w>", "1</w>", "2</w>", "3</w>", "4</w>", "5</w>", "6</w>", "7</w>", "8</w>", "9</w>", "0</w>", "1</w>", "2</w>", "3</w>", "4</w>", "5</w>", "6</w>", "7</w>", "8</w>", "9</w>", "1</w>", "0</w>", "1</w>", "0</w>", "0</w>", "1</w>", "0</w>", "0</w>", "0</w>"],
+      ids: [49406, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 272, 271, 272, 271, 271, 272, 271, 271, 271, 49407],
+      decoded: "<|startoftext|>0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 1 0 1 0 0 1 0 0 0 <|endoftext|>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["the</w>", "company</w>", "was</w>", "founded</w>", "in</w>", "2</w>", "0</w>", "1</w>", "6</w>", ".</w>"],
+      ids: [49406, 518, 2634, 739, 12240, 530, 273, 271, 272, 277, 269, 49407],
+      decoded: "<|startoftext|>the company was founded in 2 0 1 6. <|endoftext|>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["a</w>", "'ll</w>", "!!</w>", "to</w>", "?'</w>", "d</w>", "''</w>", "d</w>", "of</w>", ",</w>", "can</w>", "'t</w>", ".</w>"],
+      ids: [49406, 320, 1342, 748, 531, 13610, 323, 8445, 323, 539, 267, 753, 713, 269, 49407],
+      decoded: "<|startoftext|>a 'll!! to?' d '' d of, can 't. <|endoftext|>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def</w>", "main</w>", "(", "):</w>", "pass</w>"],
+      ids: [49406, 11649, 2623, 7, 4143, 3511, 49407],
+      decoded: "<|startoftext|>def main (): pass <|endoftext|>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let</w>", "a</w>", "=</w>", "ob", "j</w>", ".</w>", "to", "string</w>", "(", ");</w>", "to", "string</w>", "(", ");</w>"],
+      ids: [49406, 1094, 320, 284, 1411, 329, 269, 580, 9696, 7, 19686, 580, 9696, 7, 19686, 49407],
+      decoded: "<|startoftext|>let a = obj. tostring (); tostring (); <|endoftext|>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["this</w>", "is</w>", "a</w>", "test</w>", ".</w>"],
+      ids: [49406, 589, 533, 320, 1628, 269, 49407],
+      decoded: "<|startoftext|>this is a test. <|endoftext|>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["un", "want", "\u00c3\u00a9", "d</w>", ",</w>", "running</w>"],
+      ids: [49406, 569, 18356, 3459, 323, 267, 2761, 49407],
+      decoded: "<|startoftext|>unwant\u00e9d, running <|endoftext|>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1</w>", "\u0100</w>", "2</w>", "\u00ef\u00bf\u00bd</w>", "3</w>"],
+      ids: [49406, 272, 444, 273, 39802, 274, 49407],
+      decoded: "<|startoftext|>1 \u0000 2 \ufffd 3 <|endoftext|>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["hello</w>", "world</w>"],
+      ids: [49406, 3306, 1002, 49407],
+      decoded: "<|startoftext|>hello world <|endoftext|>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello</w>", "world</w>"],
+      ids: [49406, 3306, 1002, 49407],
+      decoded: "<|startoftext|>hello world <|endoftext|>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141", "\u00e6", "\u00b4", "\u00bb", "\u00e7", "\u013c", "\u0126", "\u00e7\u013e\u0141", "\u00e8", "\u00b0", "\u013d", "\u00e6\u013a", "\u00af</w>"],
+      ids: [49406, 33375, 162, 112, 119, 163, 248, 226, 41570, 164, 108, 249, 42891, 363, 49407],
+      decoded: "<|startoftext|>\u751f\u6d3b\u7684\u771f\u8c1b\u662f <|endoftext|>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["leading</w>", "space</w>"],
+      ids: [49406, 3833, 2138, 49407],
+      decoded: "<|startoftext|>leading space <|endoftext|>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["trailing</w>", "space</w>"],
+      ids: [49406, 37427, 2138, 49407],
+      decoded: "<|startoftext|>trailing space <|endoftext|>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["hi</w>", "hello</w>"],
+      ids: [49406, 1883, 3306, 49407],
+      decoded: "<|startoftext|>hi hello <|endoftext|>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test</w>", "$</w>", "1</w>", "r</w>", "2</w>", "#</w>", "3</w>", "\u00e2\u0124\u00ac</w>", "4</w>", "\u00c2\u00a3</w>", "5</w>", "\u00c2\u00a5</w>", "6</w>", "\u00e2\u0124", "\u00a3</w>", "7</w>", "\u00e2\u0124\u00b9</w>", "8</w>", "\u00e2\u0124", "\u00b1</w>", "9</w>", "test</w>"],
+      ids: [49406, 1628, 259, 272, 337, 273, 258, 274, 6309, 275, 1950, 276, 20199, 277, 5227, 352, 278, 21777, 279, 5227, 365, 280, 1628, 49407],
+      decoded: "<|startoftext|>test $ 1 r 2 # 3 \u20ac 4 \u00a3 5 \u00a5 6 \u20a3 7 \u20b9 8 \u20b1 9 test <|endoftext|>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["i</w>", "bought</w>", "an</w>", "apple</w>", "for</w>", "$</w>", "1</w>", ".</w>", "0</w>", "0</w>", "at</w>", "the</w>", "store</w>", ".</w>"],
+      ids: [49406, 328, 4142, 550, 3055, 556, 259, 272, 269, 271, 271, 536, 518, 2183, 269, 49407],
+      decoded: "<|startoftext|>i bought an apple for $ 1. 0 0 at the store. <|endoftext|>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you</w>", "\u00e2\u0122\u00a6</w>"],
+      ids: [49406, 592, 959, 49407],
+      decoded: "<|startoftext|>you \u2026 <|endoftext|>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you</w>", "\u00e2\u0122\u00a6</w>"],
+      ids: [49406, 592, 959, 49407],
+      decoded: "<|startoftext|>you \u2026 <|endoftext|>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you</w>", "\u00e2\u0122\u00a6</w>", "you</w>", "\u00e2\u0122\u00a6</w>"],
+      ids: [49406, 592, 959, 592, 959, 49407],
+      decoded: "<|startoftext|>you \u2026 you \u2026 <|endoftext|>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["weird</w>", "\u00ef", "\u00bd", "\u0140</w>", "edge</w>", "\u00ef", "\u00bd", "\u0140</w>", "case</w>"],
+      ids: [49406, 5613, 171, 121, 508, 5461, 171, 121, 508, 2068, 49407],
+      decoded: "<|startoftext|>weird \uff5e edge \uff5e case <|endoftext|>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u00e2\u0138", "\u0123</w>", "this</w>", "\u00e2\u0138", "\u0123</w>", "is</w>", "\u00e2\u0138", "\u0123</w>", "a</w>", "\u00e2\u0138", "\u0123</w>", "test</w>", "\u00e2\u0138", "\u0123", ".</w>"],
+      ids: [49406, 4168, 479, 589, 4168, 479, 533, 4168, 479, 320, 4168, 479, 1628, 4168, 223, 269, 49407],
+      decoded: "<|startoftext|>\u2581 this \u2581 is \u2581 a \u2581 test \u2581. <|endoftext|>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u00f0\u0141\u013a\u0124</w>", "\u00f0\u0141\u0133\u012f</w>", "\u00f0\u0141\u00a4\u00a3</w>", "\u00f0\u0141\u013a\u012f</w>", "\u00f0\u0141\u013a\u0143</w>", "\u00f0\u0141\u0130\u012b</w>", "\u00f0\u0141\u013b\u0131</w>", "\u00f0\u0141\u013a\u012c</w>", "\u00f0\u0141\u0136\u00a5</w>", "\u00f0\u0141\u013a\u0123</w>", "\u00f0\u0141\u013a\u0127</w>", "\u00f0\u0141\u00a4\u0139</w>", "\u00f0\u0141\u013a\u0128</w>", "\u00f0\u0141\u0133\u0131</w>", "\u00e2\u013f\u00a4\u00ef\u00b8\u0131</w>", "\u00f0\u0141\u0134\u013e</w>", "\u00f0\u0141\u0134\u013c</w>", "\u00f0\u0141\u0134\u0139</w>", "\u00f0\u0141\u0134\u013b</w>", "\u00f0\u0141\u0138\u00a4</w>", "\u00f0\u0141\u013a\u0130</w>", "\u00f0\u0141\u0133\u012e</w>", "\u00f0\u0141\u00a5\u00b3</w>", "\u00f0\u0141\u0134\u00aa</w>", "\u00e2\u013e\u00a8</w>", "\u00f0\u0141\u0133\u012b</w>", "\u00f0\u0141\u0133\u0122</w>", "\u00f0\u0141\u0134\u00af</w>", "\u00f0\u0141\u0130\u012a</w>", "\u00f0\u0141\u013b\u012a</w>", "\u00f0\u0141\u013b\u012e</w>", "\u00f0\u0141\u0134\u0122</w>", "\u00f0\u0141\u0133\u0129</w>", "\u00f0\u0141\u0133\u012d</w>", "\u00e2\u013e\u0127</w>", "\u00f0\u0141\u0130\u0123</w>", "\u00f0\u0141\u012e\u0140</w>", "\u00f0\u0141\u012e\u00b8</w>", "\u00f0\u0141\u0134\u00b0</w>"],
+      ids: [49406, 1558, 4201, 9909, 1754, 3915, 3986, 5503, 3020, 3016, 4821, 9188, 10465, 10943, 4829, 1752, 4882, 6521, 6690, 4074, 10860, 4345, 4494, 28055, 6440, 3531, 3988, 5908, 7018, 14448, 9516, 4855, 12158, 7475, 17686, 5564, 13462, 12980, 10980, 14078, 49407],
+      decoded: "<|startoftext|>\ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0 <|endoftext|>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u00e2\u013e\u00a8</w>", "\u00f0\u0141\u00a4\u0139</w>", "\u00f0\u0141\u0133\u0123", "\u00ef\u00b8\u0131</w>", "\u00f0\u0141\u0133", "\u00b1", "\u00f0\u0141\u0131\u00bb</w>", "\u00f0\u0141\u0137", "\u00b5", "\u00e2\u0122\u012f\u00e2\u013b\u0124\u00ef\u00b8\u0131</w>", "\u00f0\u0141\u00a7", "\u013b", "\u00f0\u0141\u0131\u00bb", "\u00e2\u0122\u012f\u00e2\u013b", "\u0124</w>", "\u00f0\u0141\u0133\u00a8", "\u00f0\u0141\u0131\u00bb\u00e2\u0122\u012f", "\u00f0\u0141\u012e\u00be</w>", "\u00f0\u0141\u00a7", "\u0133", "\u00e2\u0122\u012f", "\u00f0\u0141\u00a4", "\u013f", "\u00e2\u0122\u012f", "\u00f0\u0141\u00a7", "\u0133</w>", "\u00f0\u0141\u0133\u00a9\u00e2\u0122\u012f", "\u00e2\u013f\u00a4", "\u00e2\u0122\u012f", "\u00f0\u0141\u0134\u012d", "\u00e2\u0122\u012f", "\u00f0\u0141\u0133", "\u00a8</w>", "\u00f0\u0141\u0133\u00a9\u00e2\u0122\u012f", "\u00f0\u0141\u0133\u00a9\u00e2\u0122\u012f", "\u00f0\u0141\u0133\u00a7", "\u00e2\u0122\u012f", "\u00f0\u0141\u0133", "\u00a6</w>", "\u00f0\u0141\u00a7", "\u0133", "\u00f0\u0141\u0131\u00bb\u00e2\u0122\u012f", "\u00f0\u0141\u00a4", "\u013f", "\u00e2\u0122\u012f", "\u00f0\u0141\u00a7", "\u0133", "\u00f0\u0141\u0131\u00bb</w>", "\u00f0\u0141\u0131\u00b4", "\u00f3", "\u0142", "\u0123", "\u00a7", "\u00f3", "\u0142", "\u0123", "\u00a2", "\u00f3", "\u0142", "\u0123", "\u00a5", "\u00f3", "\u0142", "\u0123", "\u00ae", "\u00f3", "\u0142", "\u0123", "\u00a7", "\u00f3", "\u0142", "\u0123", "\u00bf</w>", "\u00f0\u0141\u0133\u00a8", "\u00f0\u0141\u0131\u00bb\u00e2\u0122\u012f", "\u00e2\u013f\u00a4\u00ef\u00b8\u0131", "\u00e2\u0122\u012f", "\u00f0\u0141\u0134\u012d", "\u00e2\u0122\u012f", "\u00f0\u0141\u0133\u00a8", "\u00f0\u0141\u0131\u00bc</w>"],
+      ids: [49406, 3531, 10465, 47796, 1001, 964, 109, 3702, 7692, 113, 10613, 8792, 247, 5042, 5177, 480, 18966, 46250, 39796, 8792, 239, 4244, 1793, 251, 4244, 8792, 495, 26304, 1266, 4244, 12217, 4244, 964, 357, 26304, 26304, 48938, 4244, 964, 355, 8792, 239, 46250, 1793, 251, 4244, 8792, 239, 3702, 39690, 175, 254, 223, 100, 175, 254, 223, 95, 175, 254, 223, 98, 175, 254, 223, 106, 175, 254, 223, 100, 175, 254, 223, 379, 18966, 46250, 2626, 4244, 12217, 4244, 18966, 4027, 49407],
+      decoded: "<|startoftext|>\u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75\u200d\u2642\ufe0f \ud83e\uddd9\ud83c\udffb\u200d\u2642 \ud83d\udc68\ud83c\udffb\u200d\ud83c\udf3e \ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1 \ud83d\udc69\u200d\u2764\u200d\ud83d\udc8b\u200d\ud83d\udc68 \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 \ud83e\uddd1\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc68\ud83c\udffc <|endoftext|>",
+    },
+  },
+  "Xenova/owlvit-base-patch32": {
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["a</w>", "'ll</w>", "!", "!", "to</w>", "?'</w>", "d</w>", "''</w>", "d</w>", "of</w>", ",</w>", "can</w>", "'t</w>", ".</w>"],
+      ids: [49406, 320, 1342, 0, 0, 531, 13610, 323, 8445, 323, 539, 267, 753, 713, 269, 49407],
+      decoded: "<|startoftext|>a 'll!!to?' d '' d of, can 't. <|endoftext|>",
+    },
+  },
+};
diff --git a/tests/models/deberta-v2/tokenization.js b/tests/models/deberta-v2/tokenization.js
new file mode 100644
index 000000000..177502340
--- /dev/null
+++ b/tests/models/deberta-v2/tokenization.js
@@ -0,0 +1,304 @@
+import { DebertaV2Tokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, BERT_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = DebertaV2Tokenizer;
+export const TEST_CONFIG = {
+  "Xenova/nli-deberta-v3-small": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["\u2581How", "\u2581are", "\u2581you", "\u2581doing", "?"],
+      ids: [1, 577, 281, 274, 653, 302, 2],
+      decoded: "[CLS] How are you doing?[SEP]",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["\u2581You", "\u2581should", "'", "ve", "\u2581done", "\u2581this"],
+      ids: [1, 367, 403, 280, 415, 619, 291, 2],
+      decoded: "[CLS] You should've done this[SEP]",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["\u25810", "123456", "789", "\u25810", "\u25811", "\u25812", "\u25813", "\u25814", "\u25815", "\u25816", "\u25817", "\u25818", "\u25819", "\u258110", "\u2581100", "\u25811000"],
+      ids: [1, 767, 120304, 51535, 767, 376, 392, 404, 453, 456, 525, 574, 578, 712, 466, 803, 4985, 2],
+      decoded: "[CLS] 0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000[SEP]",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["\u2581The", "\u2581company", "\u2581was", "\u2581founded", "\u2581in", "\u25812016", "."],
+      ids: [1, 279, 483, 284, 3679, 267, 892, 260, 2],
+      decoded: "[CLS] The company was founded in 2016.[SEP]",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["\u2581A", "\u2581'", "ll", "\u2581!", "!", "to", "?", "'", "d", "'", "'", "d", "\u2581of", ",", "\u2581can", "'", "t", "."],
+      ids: [1, 336, 382, 436, 1084, 300, 725, 302, 280, 407, 280, 280, 407, 265, 261, 295, 280, 297, 260, 2],
+      decoded: "[CLS] A 'll!!to?'d''d of, can't.[SEP]",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["\u2581def", "\u2581main", "(", ")", ":", "\u2581pass"],
+      ids: [1, 23097, 872, 555, 285, 294, 1633, 2],
+      decoded: "[CLS] def main(): pass[SEP]",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["\u2581let", "\u2581a", "\u2581=", "\u2581obj", ".", "to", "String", "(", ")", ";", "\u2581to", "String", "(", ")", ";"],
+      ids: [1, 678, 266, 1842, 68215, 260, 725, 29867, 555, 285, 346, 264, 29867, 555, 285, 346, 2],
+      decoded: "[CLS] let a = obj.toString(); toString();[SEP]",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["\u2581This", "\u2581is", "\u2581a", "\u2581test", "."],
+      ids: [1, 329, 269, 266, 1010, 260, 2],
+      decoded: "[CLS] This is a test.[SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["\u2581UN", "want", "\u00e9", "d", ",", "running"],
+      ids: [1, 4647, 27364, 5858, 407, 261, 15243, 2],
+      decoded: "[CLS] UNwant\u00e9d,running[SEP]",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["\u25811", "\u0000", "2", "\u25813"],
+      ids: [1, 376, 3, 445, 404, 2],
+      decoded: "[CLS] 1[UNK]2 3[SEP]",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["\u2581Hello", "\u2581World"],
+      ids: [1, 5365, 964, 2],
+      decoded: "[CLS] Hello World[SEP]",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["\u2581hello", "\u2581world"],
+      ids: [1, 12018, 447, 2],
+      decoded: "[CLS] hello world[SEP]",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u2581", "\u751f", "\u6d3b", "\u7684", "\u771f", "\u8c1b", "\u662f"],
+      ids: [1, 507, 41065, 101952, 9301, 98186, 3, 30060, 2],
+      decoded: "[CLS] \u751f\u6d3b\u7684\u771f[UNK]\u662f[SEP]",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u2581leading", "\u2581space"],
+      ids: [1, 1249, 754, 2],
+      decoded: "[CLS] leading space[SEP]",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581trailing", "\u2581space"],
+      ids: [1, 18347, 754, 2],
+      decoded: "[CLS] trailing space[SEP]",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["\u2581Hi", "\u2581Hello"],
+      ids: [1, 2684, 5365, 2],
+      decoded: "[CLS] Hi Hello[SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["\u2581test", "\u2581$", "1", "\u2581R", "2", "\u2581#", "3", "\u2581\u20ac4", "\u2581\u00a35", "\u2581\u00a5", "6", "\u2581", "\u20a3", "7", "\u2581\u20b9", "8", "\u2581\u20b1", "9", "\u2581test"],
+      ids: [1, 1010, 419, 435, 909, 445, 953, 508, 56238, 14636, 56478, 765, 507, 3, 819, 34880, 804, 121499, 1088, 1010, 2],
+      decoded: "[CLS] test $1 R2 #3 \u20ac4 \u00a35 \u00a56 [UNK]7 \u20b98 \u20b19 test[SEP]",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["\u2581I", "\u2581bought", "\u2581an", "\u2581apple", "\u2581for", "\u2581$", "1", ".", "00", "\u2581at", "\u2581the", "\u2581store", "."],
+      ids: [1, 273, 2031, 299, 6038, 270, 419, 435, 260, 962, 288, 262, 1106, 260, 2],
+      decoded: "[CLS] I bought an apple for $1.00 at the store.[SEP]",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u2581you", ".", ".", "."],
+      ids: [1, 274, 260, 260, 260, 2],
+      decoded: "[CLS] you...[SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u2581you", ".", ".", "."],
+      ids: [1, 274, 260, 260, 260, 2],
+      decoded: "[CLS] you...[SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u2581you", ".", ".", ".", "\u2581you", ".", ".", "."],
+      ids: [1, 274, 260, 260, 260, 274, 260, 260, 260, 2],
+      decoded: "[CLS] you... you...[SEP]",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["\u2581weird", "\u2581", "\uff5e", "\u2581edge", "\u2581", "\uff5e", "\u2581case"],
+      ids: [1, 4926, 507, 96622, 2363, 507, 96622, 571, 2],
+      decoded: "[CLS] weird \uff5e edge \uff5e case[SEP]",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581This", "\u2581is", "\u2581a", "\u2581test", "\u2581."],
+      ids: [1, 329, 269, 266, 1010, 323, 2],
+      decoded: "[CLS] This is a test.[SEP]",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u2581\ud83d\ude02", "\u2581", "\ud83d\udc4d", "\u2581", "\ud83e\udd23", "\u2581", "\ud83d\ude0d", "\u2581", "\ud83d\ude2d", "\u2581", "\ud83c\udf89", "\u2581", "\ud83d\ude4f", "\u2581\ud83d\ude0a", "\u2581\ud83d\udd25", "\u2581", "\ud83d\ude01", "\u2581", "\ud83d\ude05", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\ude06", "\u2581", "\ud83d\udc4f", "\u2581\u2764", "\ufe0f", "\u2581", "\ud83d\udc9c", "\u2581", "\ud83d\udc9a", "\u2581", "\ud83d\udc97", "\u2581", "\ud83d\udc99", "\u2581", "\ud83d\udda4", "\u2581", "\ud83d\ude0e", "\u2581", "\ud83d\udc4c", "\u2581", "\ud83e\udd73", "\u2581", "\ud83d\udcaa", "\u2581", "\u2728", "\u2581", "\ud83d\udc49", "\u2581", "\ud83d\udc40", "\u2581", "\ud83d\udcaf", "\u2581", "\ud83c\udf88", "\u2581", "\ud83d\ude48", "\u2581", "\ud83d\ude4c", "\u2581", "\ud83d\udc80", "\u2581", "\ud83d\udc47", "\u2581", "\ud83d\udc4b", "\u2581\u2705", "\u2581", "\ud83c\udf81", "\u2581", "\ud83c\udf1e", "\u2581", "\ud83c\udf38", "\u2581", "\ud83d\udcb0"],
+      ids: [1, 97504, 507, 117545, 507, 123057, 507, 96353, 507, 123058, 507, 123169, 507, 121772, 109976, 115475, 507, 122874, 507, 124017, 507, 123983, 507, 123571, 507, 122632, 49509, 25377, 507, 123614, 507, 124105, 507, 124077, 507, 123384, 507, 124382, 507, 123340, 507, 123492, 507, 3, 507, 123306, 507, 110119, 507, 122633, 507, 123659, 507, 123765, 507, 125799, 507, 124322, 507, 122878, 507, 125843, 507, 124011, 507, 125021, 88523, 507, 124698, 507, 125612, 507, 123887, 507, 123979, 2],
+      decoded: "[CLS] \ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c [UNK] \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0[SEP]",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2581", "\u2728", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\udc41", "\ufe0f", "\u2581", "\ud83d\udc71", "\ud83c\udffb", "\u2581", "\ud83d\udd75", "\u2581", "\u2642", "\ufe0f", "\u2581", "\ud83e\uddd9", "\ud83c\udffb", "\u2581", "\u2642", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u2581", "\ud83c\udf3e", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83d\udc69", "\u2581\u2764", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc67", "\u2581", "\ud83d\udc66", "\u2581", "\ud83e\uddd1", "\ud83c\udffb", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1", "\ud83c\udffb", "\u2581", "\ud83c\udff4", "\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u2581\u2764", "\ufe0f", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\ud83c\udffc"],
+      ids: [1, 507, 110119, 507, 123983, 507, 127294, 25377, 507, 3, 108391, 507, 3, 507, 117868, 25377, 507, 3, 108391, 507, 117868, 507, 125199, 108391, 507, 3, 507, 3, 507, 3, 507, 3, 507, 124709, 49509, 507, 124327, 507, 125199, 507, 124709, 507, 124709, 507, 126640, 507, 126853, 507, 3, 108391, 507, 3, 507, 3, 108391, 507, 126132, 3, 507, 125199, 108391, 49509, 25377, 507, 124327, 507, 125199, 118155, 2],
+      decoded: "[CLS] \u2728 \ud83e\udd17 \ud83d\udc41\ufe0f [UNK]\ud83c\udffb [UNK] \u2642\ufe0f [UNK]\ud83c\udffb \u2642 \ud83d\udc68\ud83c\udffb [UNK] [UNK] [UNK] [UNK] \ud83d\udc69 \u2764 \ud83d\udc8b \ud83d\udc68 \ud83d\udc69 \ud83d\udc69 \ud83d\udc67 \ud83d\udc66 [UNK]\ud83c\udffb [UNK] [UNK]\ud83c\udffb \ud83c\udff4[UNK] \ud83d\udc68\ud83c\udffb \u2764\ufe0f \ud83d\udc8b \ud83d\udc68\ud83c\udffc[SEP]",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["\u2581a", "h", "\u535a", "\u63a8", "zz"],
+      ids: [1, 266, 1537, 122598, 111743, 23260, 2],
+      decoded: "[CLS] ah\u535a\u63a8zz[SEP]",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["\u2581H\u00e9", "llo"],
+      ids: [1, 93519, 25341, 2],
+      decoded: "[CLS] H\u00e9llo[SEP]",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["\u2581He", "LL", "o", "!", "how", "\u2581Are", "\u2581yo", "U", "?"],
+      ids: [1, 383, 17145, 795, 300, 5608, 1396, 14469, 2628, 302, 2],
+      decoded: "[CLS] HeLLo!how Are yoU?[SEP]",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["\u2581H\u00e4", "LL", "o", "!", "how", "\u2581Are", "\u2581yo", "U", "?"],
+      ids: [1, 62693, 17145, 795, 300, 5608, 1396, 14469, 2628, 302, 2],
+      decoded: "[CLS] H\u00e4LLo!how Are yoU?[SEP]",
+    },
+  },
+  "Xenova/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["\u2581How", "\u2581are", "\u2581you", "\u2581do", "ing", "?"],
+      ids: [1, 5101, 419, 522, 343, 348, 292, 2],
+      decoded: "[CLS] How are you doing?[SEP]",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["\u2581", "0123456789", "\u25810", "\u25811", "\u25812", "\u25813", "\u25814", "\u25815", "\u25816", "\u25817", "\u25818", "\u25819", "\u258110", "\u2581100", "\u25811000"],
+      ids: [1, 260, 170160, 498, 334, 357, 382, 420, 431, 571, 618, 631, 775, 476, 967, 3884, 2],
+      decoded: "[CLS] 0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000[SEP]",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["\u2581The", "\u2581company", "\u2581was", "\u2581found", "ed", "\u2581in", "\u25812016."],
+      ids: [1, 487, 5836, 640, 5898, 346, 282, 13792, 2],
+      decoded: "[CLS] The company was founded in 2016.[SEP]",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["\u2581A", "\u2581", "'", "ll", "\u2581", "!!", "to", "?", "'", "d", "''", "d", "\u2581of", ",", "\u2581can", "'", "t", "."],
+      ids: [1, 299, 260, 278, 1579, 260, 1524, 477, 292, 278, 286, 4461, 286, 305, 262, 739, 278, 271, 261, 2],
+      decoded: "[CLS] A 'll!!to?'d''d of, can't.[SEP]",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["\u2581de", "f", "\u2581main", "():", "\u2581pass"],
+      ids: [1, 270, 368, 4398, 78612, 4748, 2],
+      decoded: "[CLS] def main(): pass[SEP]",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["\u2581let", "\u2581", "a", "\u2581", "=", "\u2581obj", ".", "toString", "();", "\u2581", "toString", "();"],
+      ids: [1, 3257, 260, 263, 260, 350, 50670, 261, 64577, 1994, 260, 64577, 1994, 2],
+      decoded: "[CLS] let a = obj.toString(); toString();[SEP]",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["\u2581This", "\u2581is", "\u2581", "a", "\u2581test", "."],
+      ids: [1, 1495, 340, 260, 263, 2979, 261, 2],
+      decoded: "[CLS] This is a test.[SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["\u2581UN", "wan", "t\u00e9", "d", ",", "running"],
+      ids: [1, 10970, 3016, 3986, 286, 262, 170565, 2],
+      decoded: "[CLS] UNwant\u00e9d,running[SEP]",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u2581", "\u751f\u6d3b\u7684", "\u771f", "\u8c1b", "\u662f"],
+      ids: [1, 260, 197263, 7275, 241962, 1544, 2],
+      decoded: "[CLS] \u751f\u6d3b\u7684\u771f\u8c1b\u662f[SEP]",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u2581", "leading", "\u2581space"],
+      ids: [1, 260, 22120, 11496, 2],
+      decoded: "[CLS] leading space[SEP]",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581trail", "ing", "\u2581space"],
+      ids: [1, 66699, 348, 11496, 2],
+      decoded: "[CLS] trailing space[SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["\u2581test", "\u2581$1", "\u2581R", "2", "\u2581#3", "\u2581\u20ac4", "\u2581\u00a35", "\u2581\u00a5", "6", "\u2581", "\u20a3", "7", "\u2581\u20b9", "8", "\u2581", "\u20b1", "9", "\u2581test"],
+      ids: [1, 2979, 21793, 532, 339, 19403, 157186, 156260, 33481, 452, 260, 242687, 488, 39568, 450, 260, 211232, 496, 2979, 2],
+      decoded: "[CLS] test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test[SEP]",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["\u2581I", "\u2581b", "ought", "\u2581an", "\u2581apple", "\u2581for", "\u2581$", "1.00", "\u2581at", "\u2581the", "\u2581store", "."],
+      ids: [1, 337, 331, 22280, 462, 44791, 333, 1161, 42645, 345, 288, 5318, 261, 2],
+      decoded: "[CLS] I bought an apple for $1.00 at the store.[SEP]",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u2581you", "..."],
+      ids: [1, 522, 303, 2],
+      decoded: "[CLS] you...[SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u2581you", "..."],
+      ids: [1, 522, 303, 2],
+      decoded: "[CLS] you...[SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u2581you", "...", "\u2581you", "..."],
+      ids: [1, 522, 303, 522, 303, 2],
+      decoded: "[CLS] you... you...[SEP]",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["\u2581w", "eird", "\u2581", "\uff5e", "\u2581edge", "\u2581", "\uff5e", "\u2581case"],
+      ids: [1, 415, 116640, 260, 2790, 53876, 260, 2790, 4073, 2],
+      decoded: "[CLS] weird \uff5e edge \uff5e case[SEP]",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581This", "\u2581is", "\u2581", "a", "\u2581test", "\u2581", "."],
+      ids: [1, 1495, 340, 260, 263, 2979, 260, 261, 2],
+      decoded: "[CLS] This is a test.[SEP]",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u2581", "\ud83d\ude02", "\u2581", "\ud83d\udc4d", "\u2581", "\ud83e\udd23", "\u2581", "\ud83d\ude0d", "\u2581", "\ud83d\ude2d", "\u2581", "\ud83c\udf89", "\u2581", "\ud83d\ude4f", "\u2581", "\ud83d\ude0a", "\u2581", "\ud83d\udd25", "\u2581", "\ud83d\ude01", "\u2581", "\ud83d\ude05", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\ude06", "\u2581", "\ud83d\udc4f", "\u2581\u2764", "\ufe0f", "\u2581", "\ud83d\udc9c", "\u2581", "\ud83d\udc9a", "\u2581", "\ud83d\udc97", "\u2581", "\ud83d\udc99", "\u2581", "\ud83d\udda4", "\u2581", "\ud83d\ude0e", "\u2581", "\ud83d\udc4c", "\u2581", "\ud83e\udd73", "\u2581", "\ud83d\udcaa", "\u2581", "\u2728", "\u2581\ud83d\udc49", "\u2581", "\ud83d\udc40", "\u2581", "\ud83d\udcaf", "\u2581", "\ud83c\udf88", "\u2581", "\ud83d\ude48", "\u2581", "\ud83d\ude4c", "\u2581", "\ud83d\udc80", "\u2581", "\ud83d\udc47", "\u2581", "\ud83d\udc4b", "\u2581\u2705", "\u2581", "\ud83c\udf81", "\u2581", "\ud83c\udf1e", "\u2581", "\ud83c\udf38", "\u2581", "\ud83d\udcb0"],
+      ids: [1, 260, 116844, 260, 72330, 260, 160951, 260, 78796, 260, 180546, 260, 212774, 260, 102930, 260, 71509, 260, 96089, 260, 137652, 260, 194608, 260, 182033, 260, 164467, 260, 149267, 56787, 4668, 260, 210251, 260, 195202, 260, 178523, 260, 167604, 260, 236081, 260, 157800, 260, 162843, 260, 242580, 260, 174590, 260, 65271, 113700, 260, 239652, 260, 237474, 260, 240937, 260, 239131, 260, 216701, 260, 242618, 260, 133395, 260, 240645, 82147, 260, 49599, 260, 239888, 260, 152102, 260, 239168, 2],
+      decoded: "[CLS] \ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0[SEP]",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2581", "\u2728", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\udc41", "\ufe0f", "\u2581", "\ud83d\udc71", "\ud83c\udffb", "\u2581", "\ud83d\udd75", "\u2581", "\u2642", "\ufe0f", "\u2581", "\ud83e\uddd9", "\ud83c\udffb", "\u2581", "\u2642", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u2581", "\ud83c\udf3e", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83d\udc69", "\u2581\u2764", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc67", "\u2581", "\ud83d\udc66", "\u2581", "\ud83e\uddd1", "\ud83c\udffb", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1", "\ud83c\udffb", "\u2581", "\ud83c\udff4", "\udb40\udc67", "\udb40\udc62", "\udb40\udc65", "\udb40\udc6e", "\udb40\udc67", "\udb40\udc7f", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u2581\u2764", "\ufe0f", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\ud83c\udffc"],
+      ids: [1, 260, 65271, 260, 182033, 260, 16307, 4668, 260, 244774, 75846, 260, 247133, 260, 50622, 4668, 260, 3, 75846, 260, 50622, 260, 239432, 75846, 260, 243052, 260, 244250, 260, 243394, 260, 244250, 260, 239098, 56787, 260, 223802, 260, 239432, 260, 239098, 260, 239098, 260, 241727, 260, 242446, 260, 244250, 75846, 260, 243394, 260, 244250, 75846, 260, 244177, 245994, 247023, 248837, 248531, 245994, 245953, 260, 239432, 75846, 56787, 4668, 260, 223802, 260, 239432, 159667, 2],
+      decoded: "[CLS] \u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75 \u2642\ufe0f [UNK]\ud83c\udffb \u2642 \ud83d\udc68\ud83c\udffb \ud83c\udf3e \ud83e\uddd1 \ud83e\udd1d \ud83e\uddd1 \ud83d\udc69 \u2764 \ud83d\udc8b \ud83d\udc68 \ud83d\udc69 \ud83d\udc69 \ud83d\udc67 \ud83d\udc66 \ud83e\uddd1\ud83c\udffb \ud83e\udd1d \ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb \u2764\ufe0f \ud83d\udc8b \ud83d\udc68\ud83c\udffc[SEP]",
+    },
+  },
+};
diff --git a/tests/models/distilbert/tokenization.js b/tests/models/distilbert/tokenization.js
new file mode 100644
index 000000000..5fc1f3b93
--- /dev/null
+++ b/tests/models/distilbert/tokenization.js
@@ -0,0 +1,306 @@
+import { DistilBertTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = DistilBertTokenizer;
+export const TEST_CONFIG = {
+  "Xenova/distilbert-base-cased-distilled-squad": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "are", "you", "doing", "?"],
+      ids: [101, 1731, 1132, 1128, 1833, 136, 102],
+      decoded: "[CLS] How are you doing? [SEP]",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "should", "'", "ve", "done", "this"],
+      ids: [101, 1192, 1431, 112, 1396, 1694, 1142, 102],
+      decoded: "[CLS] You should've done this [SEP]",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["01", "##23", "##45", "##6", "##7", "##8", "##9", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "100", "1000"],
+      ids: [101, 5187, 22737, 21336, 1545, 1559, 1604, 1580, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 1275, 1620, 6087, 102],
+      decoded: "[CLS] 0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000 [SEP]",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "company", "was", "founded", "in", "2016", "."],
+      ids: [101, 1109, 1419, 1108, 1771, 1107, 1446, 119, 102],
+      decoded: "[CLS] The company was founded in 2016. [SEP]",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "'", "ll", "!", "!", "to", "?", "'", "d", "'", "'", "d", "of", ",", "can", "'", "t", "."],
+      ids: [101, 138, 112, 1325, 106, 106, 1106, 136, 112, 173, 112, 112, 173, 1104, 117, 1169, 112, 189, 119, 102],
+      decoded: "[CLS] A'll!! to?'d'' d of, can't. [SEP]",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "main", "(", ")", ":", "pass"],
+      ids: [101, 19353, 1514, 113, 114, 131, 2789, 102],
+      decoded: "[CLS] def main ( ) : pass [SEP]",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "o", "##b", "##j", ".", "to", "##S", "##tring", "(", ")", ";", "to", "##S", "##tring", "(", ")", ";"],
+      ids: [101, 1519, 170, 134, 184, 1830, 3361, 119, 1106, 1708, 28108, 113, 114, 132, 1106, 1708, 28108, 113, 114, 132, 102],
+      decoded: "[CLS] let a = obj. toString ( ) ; toString ( ) ; [SEP]",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["This", "is", "a", "test", "."],
+      ids: [101, 1188, 1110, 170, 2774, 119, 102],
+      decoded: "[CLS] This is a test. [SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "##wan", "##t\u00e9", "##d", ",", "running"],
+      ids: [101, 7414, 5491, 14608, 1181, 117, 1919, 102],
+      decoded: "[CLS] UNwant\u00e9d, running [SEP]",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["123"],
+      ids: [101, 13414, 102],
+      decoded: "[CLS] 123 [SEP]",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hello", "World"],
+      ids: [101, 8667, 1291, 102],
+      decoded: "[CLS] Hello World [SEP]",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello", "world"],
+      ids: [101, 19082, 1362, 102],
+      decoded: "[CLS] hello world [SEP]",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u751f", "[UNK]", "[UNK]", "\u771f", "[UNK]", "[UNK]"],
+      ids: [101, 1056, 100, 100, 1061, 100, 100, 102],
+      decoded: "[CLS] \u751f [UNK] [UNK] \u771f [UNK] [UNK] [SEP]",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["leading", "space"],
+      ids: [101, 2020, 2000, 102],
+      decoded: "[CLS] leading space [SEP]",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["trailing", "space"],
+      ids: [101, 13161, 2000, 102],
+      decoded: "[CLS] trailing space [SEP]",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["Hi", "Hello"],
+      ids: [101, 8790, 8667, 102],
+      decoded: "[CLS] Hi Hello [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "R", "##2", "#", "3", "\u20ac", "##4", "\u00a3", "##5", "\u00a5", "##6", "[UNK]", "\u20b9", "##8", "\u20b1", "##9", "test"],
+      ids: [101, 2774, 109, 122, 155, 1477, 108, 124, 836, 1527, 202, 1571, 203, 1545, 100, 838, 1604, 837, 1580, 2774, 102],
+      decoded: "[CLS] test $ 1 R2 # 3 \u20ac4 \u00a35 \u00a56 [UNK] \u20b98 \u20b19 test [SEP]",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "bought", "an", "apple", "for", "$", "1", ".", "00", "at", "the", "store", "."],
+      ids: [101, 146, 3306, 1126, 12075, 1111, 109, 122, 119, 3135, 1120, 1103, 2984, 119, 102],
+      decoded: "[CLS] I bought an apple for $ 1. 00 at the store. [SEP]",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u2026"],
+      ids: [101, 1128, 795, 102],
+      decoded: "[CLS] you \u2026 [SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u2026"],
+      ids: [101, 1128, 795, 102],
+      decoded: "[CLS] you \u2026 [SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u2026", "you", "\u2026"],
+      ids: [101, 1128, 795, 1128, 795, 102],
+      decoded: "[CLS] you \u2026 you \u2026 [SEP]",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["weird", "[UNK]", "edge", "[UNK]", "case"],
+      ids: [101, 6994, 100, 2652, 100, 1692, 102],
+      decoded: "[CLS] weird [UNK] edge [UNK] case [SEP]",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "."],
+      ids: [101, 100, 100, 100, 100, 100, 119, 102],
+      decoded: "[CLS] [UNK] [UNK] [UNK] [UNK] [UNK]. [SEP]",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [101, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 102],
+      decoded: "[CLS] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [SEP]",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [101, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 102],
+      decoded: "[CLS] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [SEP]",
+    },
+  },
+  "Xenova/distilbert-base-uncased-finetuned-sst-2-english": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["how", "are", "you", "doing", "?"],
+      ids: [101, 2129, 2024, 2017, 2725, 1029, 102],
+      decoded: "[CLS] how are you doing? [SEP]",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["you", "should", "'", "ve", "done", "this"],
+      ids: [101, 2017, 2323, 1005, 2310, 2589, 2023, 102],
+      decoded: "[CLS] you should've done this [SEP]",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["the", "company", "was", "founded", "in", "2016", "."],
+      ids: [101, 1996, 2194, 2001, 2631, 1999, 2355, 1012, 102],
+      decoded: "[CLS] the company was founded in 2016. [SEP]",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["a", "'", "ll", "!", "!", "to", "?", "'", "d", "'", "'", "d", "of", ",", "can", "'", "t", "."],
+      ids: [101, 1037, 1005, 2222, 999, 999, 2000, 1029, 1005, 1040, 1005, 1005, 1040, 1997, 1010, 2064, 1005, 1056, 1012, 102],
+      decoded: "[CLS] a'll!! to?'d'' d of, can't. [SEP]",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "ob", "##j", ".", "to", "##st", "##ring", "(", ")", ";", "to", "##st", "##ring", "(", ")", ";"],
+      ids: [101, 2292, 1037, 1027, 27885, 3501, 1012, 2000, 3367, 4892, 1006, 1007, 1025, 2000, 3367, 4892, 1006, 1007, 1025, 102],
+      decoded: "[CLS] let a = obj. tostring ( ) ; tostring ( ) ; [SEP]",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["this", "is", "a", "test", "."],
+      ids: [101, 2023, 2003, 1037, 3231, 1012, 102],
+      decoded: "[CLS] this is a test. [SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["unwanted", ",", "running"],
+      ids: [101, 18162, 1010, 2770, 102],
+      decoded: "[CLS] unwanted, running [SEP]",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u751f", "[UNK]", "\u7684", "\u771f", "[UNK]", "[UNK]"],
+      ids: [101, 1910, 100, 1916, 1921, 100, 100, 102],
+      decoded: "[CLS] \u751f [UNK] \u7684 \u771f [UNK] [UNK] [SEP]",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["hi", "hello"],
+      ids: [101, 7632, 7592, 102],
+      decoded: "[CLS] hi hello [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "r", "##2", "#", "3", "\u20ac", "##4", "\u00a35", "\u00a5", "##6", "[UNK]", "\u20b9", "##8", "\u20b1", "##9", "test"],
+      ids: [101, 3231, 1002, 1015, 1054, 2475, 1001, 1017, 1574, 2549, 27813, 1071, 2575, 100, 1576, 2620, 1575, 2683, 3231, 102],
+      decoded: "[CLS] test $ 1 r2 # 3 \u20ac4 \u00a35 \u00a56 [UNK] \u20b98 \u20b19 test [SEP]",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["i", "bought", "an", "apple", "for", "$", "1", ".", "00", "at", "the", "store", "."],
+      ids: [101, 1045, 4149, 2019, 6207, 2005, 1002, 1015, 1012, 4002, 2012, 1996, 3573, 1012, 102],
+      decoded: "[CLS] i bought an apple for $ 1. 00 at the store. [SEP]",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["weird", "\uff5e", "edge", "\uff5e", "case"],
+      ids: [101, 6881, 1995, 3341, 1995, 2553, 102],
+      decoded: "[CLS] weird \uff5e edge \uff5e case [SEP]",
+    },
+  },
+  "Xenova/distiluse-base-multilingual-cased-v2": {
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["012", "##34", "##5", "##6", "##7", "##8", "##9", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "100", "1000"],
+      ids: [101, 69878, 78301, 11166, 11211, 11305, 11396, 11373, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 10150, 10407, 12186, 102],
+      decoded: "[CLS] 0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000 [SEP]",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "ob", "##j", ".", "to", "##S", "##trin", "##g", "(", ")", ";", "to", "##S", "##trin", "##g", "(", ")", ";"],
+      ids: [101, 13595, 169, 134, 17339, 10418, 119, 10114, 10731, 109163, 10240, 113, 114, 132, 10114, 10731, 109163, 10240, 113, 114, 132, 102],
+      decoded: "[CLS] let a = obj. toString ( ) ; toString ( ) ; [SEP]",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "##want", "##\u00e9d", ",", "running"],
+      ids: [101, 26578, 104216, 84193, 117, 18020, 102],
+      decoded: "[CLS] UNwant\u00e9d, running [SEP]",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hell", "##o", "world"],
+      ids: [101, 61694, 10133, 11356, 102],
+      decoded: "[CLS] hello world [SEP]",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u751f", "\u6d3b", "\u7684", "\u771f", "\u8c1b", "\u662f"],
+      ids: [101, 5600, 4978, 5718, 5769, 7378, 4380, 102],
+      decoded: "[CLS] \u751f \u6d3b \u7684 \u771f \u8c1b \u662f [SEP]",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["trail", "##ing", "space"],
+      ids: [101, 56559, 10230, 16199, 102],
+      decoded: "[CLS] trailing space [SEP]",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "R2", "#", "3", "\u20ac", "##4", "\u00a3", "##5", "\u00a5", "##6", "[UNK]", "\u20b9", "##8", "[UNK]", "test"],
+      ids: [101, 15839, 109, 122, 94000, 108, 124, 1775, 11011, 201, 11166, 202, 11211, 100, 1776, 11396, 100, 15839, 102],
+      decoded: "[CLS] test $ 1 R2 # 3 \u20ac4 \u00a35 \u00a56 [UNK] \u20b98 [UNK] test [SEP]",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "bought", "an", "app", "##le", "for", "$", "1", ".", "00", "at", "the", "store", "."],
+      ids: [101, 146, 28870, 10151, 72894, 10284, 10142, 109, 122, 119, 11025, 10160, 10105, 13708, 119, 102],
+      decoded: "[CLS] I bought an apple for $ 1. 00 at the store. [SEP]",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "[UNK]"],
+      ids: [101, 13028, 100, 102],
+      decoded: "[CLS] you [UNK] [SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "[UNK]"],
+      ids: [101, 13028, 100, 102],
+      decoded: "[CLS] you [UNK] [SEP]",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "[UNK]", "you", "[UNK]"],
+      ids: [101, 13028, 100, 13028, 100, 102],
+      decoded: "[CLS] you [UNK] you [UNK] [SEP]",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["wei", "##rd", "\uff5e", "edge", "\uff5e", "case"],
+      ids: [101, 86981, 12023, 10096, 30599, 10096, 13474, 102],
+      decoded: "[CLS] weird \uff5e edge \uff5e case [SEP]",
+    },
+  },
+};
diff --git a/tests/models/esm/tokenization.js b/tests/models/esm/tokenization.js
new file mode 100644
index 000000000..c072d5251
--- /dev/null
+++ b/tests/models/esm/tokenization.js
@@ -0,0 +1,322 @@
+import { EsmTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, ESM_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = EsmTokenizer;
+export const TEST_CONFIG = {
+  "Xenova/nucleotide-transformer-500m-human-ref": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      // "tokens": ["How", "are", "you", "doing?"],
+      ids: [3, 0, 0, 0, 0],
+      decoded: "<cls> <unk> <unk> <unk> <unk>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      // "tokens": ["You", "should've", "done", "this"],
+      ids: [3, 0, 0, 0, 0],
+      decoded: "<cls> <unk> <unk> <unk> <unk>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      // "tokens": ["0123456789", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "100", "1000"],
+      ids: [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+      decoded: "<cls> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      // "tokens": ["T", "he", "company", "was", "founded", "in", "2016."],
+      ids: [3, 4101, 0, 0, 0, 0, 0, 0],
+      decoded: "<cls> T <unk> <unk> <unk> <unk> <unk> <unk>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      // "tokens": ["A", "'ll", "!!to?'d''d", "of,", "can't."],
+      ids: [3, 4100, 0, 0, 0, 0],
+      decoded: "<cls> A <unk> <unk> <unk> <unk>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      // "tokens": ["def", "main():", "pass"],
+      ids: [3, 0, 0, 0],
+      decoded: "<cls> <unk> <unk> <unk>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      // "tokens": ["let", "a", "=", "obj.toString();", "toString();"],
+      ids: [3, 0, 0, 0, 0, 0],
+      decoded: "<cls> <unk> <unk> <unk> <unk> <unk>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      // "tokens": ["T", "his", "is", "a", "test."],
+      ids: [3, 4101, 0, 0, 0, 0],
+      decoded: "<cls> T <unk> <unk> <unk> <unk>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      // "tokens": ["U", "N", "want\u00e9d,running"],
+      ids: [3, 0, 4104, 0],
+      decoded: "<cls> <unk> N <unk>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      // "tokens": ["1\u00002\ufffd3"],
+      ids: [3, 0],
+      decoded: "<cls> <unk>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      // "tokens": ["Hello", "World"],
+      ids: [3, 0, 0],
+      decoded: "<cls> <unk> <unk>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      // "tokens": ["hello", "world"],
+      ids: [3, 0, 0],
+      decoded: "<cls> <unk> <unk>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      // "tokens": ["\u751f\u6d3b\u7684\u771f\u8c1b\u662f"],
+      ids: [3, 0],
+      decoded: "<cls> <unk>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      // "tokens": ["leading", "space"],
+      ids: [3, 0, 0],
+      decoded: "<cls> <unk> <unk>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      // "tokens": ["trailing", "space"],
+      ids: [3, 0, 0],
+      decoded: "<cls> <unk> <unk>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      // "tokens": ["Hi", "Hello"],
+      ids: [3, 0, 0],
+      decoded: "<cls> <unk> <unk>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      // "tokens": ["test", "$1", "R2", "#3", "\u20ac4", "\u00a35", "\u00a56", "\u20a37", "\u20b98", "\u20b19", "test"],
+      ids: [3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+      decoded: "<cls> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      // "tokens": ["I", "bought", "an", "apple", "for", "$1.00", "at", "the", "store."],
+      ids: [3, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+      decoded: "<cls> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      // "tokens": ["you\u2026"],
+      ids: [3, 0],
+      decoded: "<cls> <unk>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      // "tokens": ["you\u2026"],
+      ids: [3, 0],
+      decoded: "<cls> <unk>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      // "tokens": ["you\u2026", "you\u2026"],
+      ids: [3, 0, 0],
+      decoded: "<cls> <unk> <unk>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      // "tokens": ["weird", "\uff5e", "edge", "\uff5e", "case"],
+      ids: [3, 0, 0, 0, 0, 0],
+      decoded: "<cls> <unk> <unk> <unk> <unk> <unk>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      // "tokens": ["\u2581", "T", "his", "\u2581is", "\u2581a", "\u2581test", "\u2581."],
+      ids: [3, 0, 4101, 0, 0, 0, 0, 0],
+      decoded: "<cls> <unk> T <unk> <unk> <unk> <unk> <unk>",
+    },
+    SPECIAL_TOKENS: {
+      text: ESM_TEST_STRINGS.SPECIAL_TOKENS,
+      tokens: ["<unk>", "<pad>", "<mask>", "<cls>", "<eos>", "<bos>"],
+      ids: [3, 0, 1, 2, 3, 4105, 4106],
+      decoded: "<cls> <unk> <pad> <mask> <cls> <eos> <bos>",
+    },
+    PROTEIN_SEQUENCES_1: {
+      text: ESM_TEST_STRINGS.PROTEIN_SEQUENCES_1,
+      tokens: ["ATTCCG", "ATTCCG", "ATTCCG"],
+      ids: [3, 367, 367, 367],
+      decoded: "<cls> ATTCCG ATTCCG ATTCCG",
+    },
+    PROTEIN_SEQUENCES_2: {
+      text: ESM_TEST_STRINGS.PROTEIN_SEQUENCES_2,
+      tokens: ["ATTTCT", "CTCTCT", "CTCTGA", "GATCGA", "TCGATC", "G", "A", "T"],
+      ids: [3, 349, 2461, 2464, 3184, 1738, 4103, 4100, 4101],
+      decoded: "<cls> ATTTCT CTCTCT CTCTGA GATCGA TCGATC G A T",
+    },
+  },
+  "Xenova/esm2_t12_35M_UR50D": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      // "tokens": ["H", "ow", "are", "you", "doing?"],
+      ids: [0, 21, 3, 3, 3, 3, 2],
+      decoded: "<cls> H <unk> <unk> <unk> <unk> <eos>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      // "tokens": ["Y", "ou", "should've", "done", "this"],
+      ids: [0, 19, 3, 3, 3, 3, 2],
+      decoded: "<cls> Y <unk> <unk> <unk> <unk> <eos>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      // "tokens": ["0123456789", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "100", "1000"],
+      ids: [0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2],
+      decoded: "<cls> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <eos>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      // "tokens": ["T", "he", "company", "was", "founded", "in", "2016", "."],
+      ids: [0, 11, 3, 3, 3, 3, 3, 3, 29, 2],
+      decoded: "<cls> T <unk> <unk> <unk> <unk> <unk> <unk>. <eos>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      // "tokens": ["A", "'ll", "!!to?'d''d", "of,", "can't", "."],
+      ids: [0, 5, 3, 3, 3, 3, 29, 2],
+      decoded: "<cls> A <unk> <unk> <unk> <unk>. <eos>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      // "tokens": ["def", "main():", "pass"],
+      ids: [0, 3, 3, 3, 2],
+      decoded: "<cls> <unk> <unk> <unk> <eos>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      // "tokens": ["let", "a", "=", "obj", ".", "to", "S", "tring();", "to", "S", "tring();"],
+      ids: [0, 3, 3, 3, 3, 29, 3, 8, 3, 3, 8, 3, 2],
+      decoded: "<cls> <unk> <unk> <unk> <unk>. <unk> S <unk> <unk> S <unk> <eos>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      // "tokens": ["T", "his", "is", "a", "test", "."],
+      ids: [0, 11, 3, 3, 3, 3, 29, 2],
+      decoded: "<cls> T <unk> <unk> <unk> <unk>. <eos>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      // "tokens": ["U", "N", "want\u00e9d,running"],
+      ids: [0, 26, 17, 3, 2],
+      decoded: "<cls> U N <unk> <eos>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      // "tokens": ["1\u00002\ufffd3"],
+      ids: [0, 3, 2],
+      decoded: "<cls> <unk> <eos>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      // "tokens": ["H", "ello", "W", "orld"],
+      ids: [0, 21, 3, 22, 3, 2],
+      decoded: "<cls> H <unk> W <unk> <eos>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      // "tokens": ["hello", "world"],
+      ids: [0, 3, 3, 2],
+      decoded: "<cls> <unk> <unk> <eos>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      // "tokens": ["\u751f\u6d3b\u7684\u771f\u8c1b\u662f"],
+      ids: [0, 3, 2],
+      decoded: "<cls> <unk> <eos>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      // "tokens": ["leading", "space"],
+      ids: [0, 3, 3, 2],
+      decoded: "<cls> <unk> <unk> <eos>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      // "tokens": ["trailing", "space"],
+      ids: [0, 3, 3, 2],
+      decoded: "<cls> <unk> <unk> <eos>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      // "tokens": ["H", "i", "H", "ello"],
+      ids: [0, 21, 3, 21, 3, 2],
+      decoded: "<cls> H <unk> H <unk> <eos>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      // "tokens": ["test", "$1", "R", "2", "#3", "\u20ac4", "\u00a35", "\u00a56", "\u20a37", "\u20b98", "\u20b19", "test"],
+      ids: [0, 3, 3, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2],
+      decoded: "<cls> <unk> <unk> R <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <eos>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      // "tokens": ["I", "bought", "an", "apple", "for", "$1", ".", "00", "at", "the", "store", "."],
+      ids: [0, 12, 3, 3, 3, 3, 3, 29, 3, 3, 3, 3, 29, 2],
+      decoded: "<cls> I <unk> <unk> <unk> <unk> <unk>. <unk> <unk> <unk> <unk>. <eos>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      // "tokens": ["you\u2026"],
+      ids: [0, 3, 2],
+      decoded: "<cls> <unk> <eos>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      // "tokens": ["you\u2026"],
+      ids: [0, 3, 2],
+      decoded: "<cls> <unk> <eos>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      // "tokens": ["you\u2026", "you\u2026"],
+      ids: [0, 3, 3, 2],
+      decoded: "<cls> <unk> <unk> <eos>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      // "tokens": ["weird", "\uff5e", "edge", "\uff5e", "case"],
+      ids: [0, 3, 3, 3, 3, 3, 2],
+      decoded: "<cls> <unk> <unk> <unk> <unk> <unk> <eos>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      // "tokens": ["\u2581", "T", "his", "\u2581is", "\u2581a", "\u2581test", "\u2581", "."],
+      ids: [0, 3, 11, 3, 3, 3, 3, 3, 29, 2],
+      decoded: "<cls> <unk> T <unk> <unk> <unk> <unk> <unk>. <eos>",
+    },
+    SPECIAL_TOKENS: {
+      text: ESM_TEST_STRINGS.SPECIAL_TOKENS,
+      // "tokens": ["<unk>", "<pad>", "<mask>", "<cls>", "<eos>", "<bos>"],
+      ids: [0, 3, 1, 32, 0, 2, 3, 2],
+      decoded: "<cls> <unk> <pad> <mask> <cls> <eos> <unk> <eos>",
+    },
+    PROTEIN_SEQUENCES_1: {
+      text: ESM_TEST_STRINGS.PROTEIN_SEQUENCES_1,
+      tokens: ["A", "T", "T", "C", "C", "G", "A", "T", "T", "C", "C", "G", "A", "T", "T", "C", "C", "G"],
+      ids: [0, 5, 11, 11, 23, 23, 6, 5, 11, 11, 23, 23, 6, 5, 11, 11, 23, 23, 6, 2],
+      decoded: "<cls> A T T C C G A T T C C G A T T C C G <eos>",
+    },
+    PROTEIN_SEQUENCES_2: {
+      text: ESM_TEST_STRINGS.PROTEIN_SEQUENCES_2,
+      tokens: ["A", "T", "T", "T", "C", "T", "C", "T", "C", "T", "C", "T", "C", "T", "C", "T", "G", "A", "G", "A", "T", "C", "G", "A", "T", "C", "G", "A", "T", "C", "G", "A", "T"],
+      ids: [0, 5, 11, 11, 11, 23, 11, 23, 11, 23, 11, 23, 11, 23, 11, 23, 11, 6, 5, 6, 5, 11, 23, 6, 5, 11, 23, 6, 5, 11, 23, 6, 5, 11, 2],
+      decoded: "<cls> A T T T C T C T C T C T C T C T G A G A T C G A T C G A T C G A T <eos>",
+    },
+  },
+};
diff --git a/tests/models/falcon/tokenization.js b/tests/models/falcon/tokenization.js
new file mode 100644
index 000000000..9dc5827ce
--- /dev/null
+++ b/tests/models/falcon/tokenization.js
@@ -0,0 +1,244 @@
+import { FalconTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, FALCON_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = FalconTokenizer;
+export const TEST_CONFIG = {
+  "tiiuae/falcon-7b": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "\u0120are", "\u0120you", "\u0120doing", "?"],
+      ids: [1830, 362, 299, 1836, 42],
+      decoded: "How are you doing?",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "\u0120should", "'", "ve", "\u0120done", "\u0120this"],
+      ids: [1357, 808, 18, 298, 1782, 414],
+      decoded: "You should've done this",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["012", "345", "678", "9", "\u0120", "0", "\u0120", "1", "\u0120", "2", "\u0120", "3", "\u0120", "4", "\u0120", "5", "\u0120", "6", "\u0120", "7", "\u0120", "8", "\u0120", "9", "\u0120", "10", "\u0120", "100", "\u0120", "100", "0"],
+      ids: [24445, 29094, 41583, 36, 204, 27, 204, 28, 204, 29, 204, 30, 204, 31, 204, 32, 204, 33, 204, 34, 204, 35, 204, 36, 204, 696, 204, 1425, 204, 1425, 27],
+      decoded: "0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120founded", "\u0120in", "\u0120", "201", "6", "."],
+      ids: [487, 1438, 398, 9923, 272, 204, 626, 33, 25],
+      decoded: "The company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'", "ll", "\u0120", "!!", "to", "?'", "d", "''", "d", "\u0120of", ",", "\u0120can", "'", "t", "."],
+      ids: [44, 193, 18, 567, 204, 1409, 534, 12493, 79, 7544, 79, 275, 23, 418, 18, 95, 25],
+      decoded: "A\n'll!!to?'d''d of, can't.",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "\u0120main", "():", "\u010a", "\u0109", "pass"],
+      ids: [3071, 1316, 13160, 193, 192, 5412],
+      decoded: "def main():\n\tpass",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120", "=", "\u0120obj", ".", "toString", "();", "\u010a", "toString", "();"],
+      ids: [1025, 241, 204, 40, 13756, 25, 19409, 2032, 193, 19409, 2032],
+      decoded: "let a = obj.toString();\ntoString();",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["This", "\u010a", "\u010a", "is", "\u010a", "a", "\u010a", "test", "."],
+      ids: [1182, 193, 193, 259, 193, 76, 193, 4780, 25],
+      decoded: "This\n\nis\na\ntest.",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "want", "\u00c3\u00a9d", ",", "running"],
+      ids: [4000, 32108, 5706, 23, 27386],
+      decoded: "UNwant\u00e9d,running",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0100", "2", "\u00ef\u00bf", "\u00bd", "3"],
+      ids: [28, 186, 29, 13112, 133, 30],
+      decoded: "1\u00002\ufffd3",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hello", "\u0120World"],
+      ids: [9856, 2889],
+      decoded: "Hello World",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello", "\u0120world"],
+      ids: [30835, 1079],
+      decoded: "hello world",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141\u00e6\u00b4\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e\u0141", "\u00e8\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [32725, 1105, 15498, 8061, 233, 2364],
+      decoded: "\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120\u0120", "\u0120leading", "\u0120space"],
+      ids: [258, 3736, 2151],
+      decoded: "   leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tra", "iling", "\u0120space", "\u0120\u0120\u0120"],
+      ids: [9172, 4447, 2151, 466],
+      decoded: "trailing space   ",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["Hi", "\u0120", "\u0120Hello"],
+      ids: [5516, 204, 23090],
+      decoded: "Hi  Hello",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120", "$", "1", "\u0120R", "2", "\u0120", "#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2", "\u00a5", "6", "\u0120\u00e2\u0124", "\u00a3", "7", "\u0120\u00e2\u0124", "\u00b9", "8", "\u0120\u00e2\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [4780, 204, 15, 28, 382, 29, 204, 14, 30, 6471, 31, 5131, 32, 3068, 110, 33, 25631, 108, 34, 25631, 129, 35, 25631, 121, 36, 1318],
+      decoded: "test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120bought", "\u0120an", "\u0120apple", "\u0120for", "\u0120", "$", "1", ".", "00", "\u0120at", "\u0120the", "\u0120store", "."],
+      ids: [52, 5659, 267, 12381, 312, 204, 15, 28, 25, 527, 388, 248, 2946, 25],
+      decoded: "I bought an apple for $1.00 at the store.",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u0120\u0120"],
+      ids: [5667, 898, 258],
+      decoded: "you\u2026  ",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [5667, 898, 60482],
+      decoded: "you\u2026\u00a0\u00a0",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142", "\u00c2\u0142", "you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [5667, 898, 4381, 4381, 5667, 898, 60482],
+      decoded: "you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120", "\u00ef", "\u00bd", "\u0140", "\u0120edge", "\u0120", "\u00ef", "\u00bd", "\u0140", "\u0120case"],
+      ids: [698, 1505, 204, 181, 133, 236, 5753, 204, 181, 133, 236, 1494],
+      decoded: "weird \uff5e edge \uff5e case",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u00e2\u0138", "\u0123", "This", "\u0120\u00e2\u0138", "\u0123", "is", "\u0120\u00e2\u0138", "\u0123", "a", "\u0120\u00e2\u0138", "\u0123", "test", "\u0120\u00e2\u0138", "\u0123", "."],
+      ids: [13856, 207, 1182, 26607, 207, 259, 26607, 207, 76, 26607, 207, 4780, 26607, 207, 25],
+      decoded: "\u2581This \u2581is \u2581a \u2581test \u2581.",
+    },
+    NUMBERS_SPLIT: {
+      text: FALCON_TEST_STRINGS.NUMBERS_SPLIT,
+      tokens: ["12", "\u0120and", "\u0120", "123", "\u0120and", "\u0120", "123", "4"],
+      ids: [928, 273, 204, 10963, 273, 204, 10963, 31],
+      decoded: "12 and 123 and 1234",
+    },
+  },
+  "tiiuae/falcon-rw-1b": {
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "\u0120should", "'ve", "\u0120done", "\u0120this"],
+      ids: [1639, 815, 1053, 1760, 428],
+      decoded: "You should've done this",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["01", "23", "45", "67", "89", "\u01200", "\u01201", "\u01202", "\u01203", "\u01204", "\u01205", "\u01206", "\u01207", "\u01208", "\u01209", "\u012010", "\u0120100", "\u01201000"],
+      ids: [486, 1954, 2231, 3134, 4531, 657, 352, 362, 513, 604, 642, 718, 767, 807, 860, 838, 1802, 8576],
+      decoded: "0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120founded", "\u0120in", "\u01202016", "."],
+      ids: [464, 1664, 373, 9393, 287, 1584, 13],
+      decoded: "The company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'ll", "\u0120!!", "to", "?'", "d", "''", "d", "\u0120of", ",", "\u0120can", "'t", "."],
+      ids: [32, 198, 1183, 37867, 1462, 8348, 67, 7061, 67, 286, 11, 460, 470, 13],
+      decoded: "A\n'll!!to?'d''d of, can't.",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120obj", ".", "to", "String", "();", "\u010a", "to", "String", "();"],
+      ids: [1616, 257, 796, 26181, 13, 1462, 10100, 9783, 198, 1462, 10100, 9783],
+      decoded: "let a = obj.toString();\ntoString();",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "want", "\u00c3\u00a9", "d", ",", "running"],
+      ids: [4944, 42949, 2634, 67, 11, 20270],
+      decoded: "UNwant\u00e9d,running",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0100", "2", "\u00ef\u00bf\u00bd", "3"],
+      ids: [16, 188, 17, 4210, 18],
+      decoded: "1\u00002\ufffd3",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141", "\u00e6", "\u00b4", "\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e", "\u0141", "\u00e8", "\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [37955, 162, 112, 119, 21410, 40367, 253, 164, 108, 249, 42468],
+      decoded: "\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120", "\u0120", "\u0120leading", "\u0120space"],
+      ids: [220, 220, 3756, 2272],
+      decoded: "   leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tra", "iling", "\u0120space", "\u0120", "\u0120", "\u0120"],
+      ids: [9535, 4386, 2272, 220, 220, 220],
+      decoded: "trailing space   ",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2\u00a5", "6", "\u0120\u00e2", "\u0124", "\u00a3", "7", "\u0120\u00e2", "\u0124", "\u00b9", "8", "\u0120\u00e2", "\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [9288, 720, 16, 371, 17, 1303, 18, 10432, 19, 4248, 20, 38221, 21, 2343, 224, 96, 22, 2343, 224, 117, 23, 2343, 224, 109, 24, 1332],
+      decoded: "test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120bought", "\u0120an", "\u0120apple", "\u0120for", "\u0120$", "1", ".", "00", "\u0120at", "\u0120the", "\u0120store", "."],
+      ids: [40, 5839, 281, 17180, 329, 720, 16, 13, 405, 379, 262, 3650, 13],
+      decoded: "I bought an apple for $1.00 at the store.",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u0120", "\u0120"],
+      ids: [5832, 1399, 220, 220],
+      decoded: "you\u2026  ",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120\u00ef", "\u00bd", "\u0140", "\u0120edge", "\u0120\u00ef", "\u00bd", "\u0140", "\u0120case"],
+      ids: [732, 1447, 27332, 121, 252, 5743, 27332, 121, 252, 1339],
+      decoded: "weird \uff5e edge \uff5e case",
+    },
+    NUMBERS_SPLIT: {
+      text: FALCON_TEST_STRINGS.NUMBERS_SPLIT,
+      tokens: ["12", "\u0120and", "\u0120123", "\u0120and", "\u012012", "34"],
+      ids: [1065, 290, 17031, 290, 1105, 2682],
+      decoded: "12 and 123 and 1234",
+    },
+  },
+};
diff --git a/tests/models/gemma/tokenization.js b/tests/models/gemma/tokenization.js
new file mode 100644
index 000000000..2e6c4b3c0
--- /dev/null
+++ b/tests/models/gemma/tokenization.js
@@ -0,0 +1,231 @@
+import { GemmaTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, LLAMA_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = GemmaTokenizer;
+export const TEST_CONFIG = {
+  // Xenova/gemma-tokenizer
+  "Xenova/gemma2-tokenizer": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "\u2581are", "\u2581you", "\u2581doing", "?"],
+      ids: [2, 2299, 708, 692, 3900, 235336],
+      decoded: "<bos>How are you doing?",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "\u2581should", "'", "ve", "\u2581done", "\u2581this"],
+      ids: [2, 2045, 1412, 235303, 524, 3015, 736],
+      decoded: "<bos>You should've done this",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "\u2581", "0", "\u2581", "1", "\u2581", "2", "\u2581", "3", "\u2581", "4", "\u2581", "5", "\u2581", "6", "\u2581", "7", "\u2581", "8", "\u2581", "9", "\u2581", "1", "0", "\u2581", "1", "0", "0", "\u2581", "1", "0", "0", "0"],
+      ids: [2, 235276, 235274, 235284, 235304, 235310, 235308, 235318, 235324, 235321, 235315, 235248, 235276, 235248, 235274, 235248, 235284, 235248, 235304, 235248, 235310, 235248, 235308, 235248, 235318, 235248, 235324, 235248, 235321, 235248, 235315, 235248, 235274, 235276, 235248, 235274, 235276, 235276, 235248, 235274, 235276, 235276, 235276],
+      decoded: "<bos>0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u2581company", "\u2581was", "\u2581founded", "\u2581in", "\u2581", "2", "0", "1", "6", "."],
+      ids: [2, 651, 3277, 729, 18942, 575, 235248, 235284, 235276, 235274, 235318, 235265],
+      decoded: "<bos>The company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\n", "'", "ll", "\u2581!!", "to", "?'", "d", "''", "d", "\u2581of", ",", "\u2581can", "'", "t", "."],
+      ids: [2, 235280, 108, 235303, 529, 9063, 511, 18016, 235258, 3404, 235258, 576, 235269, 798, 235303, 235251, 235265],
+      decoded: "<bos>A\n'll !!to?'d''d of, can't.",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "\u2581main", "():", "\n", "\t", "pass"],
+      ids: [2, 1293, 1872, 4409, 108, 226, 3095],
+      decoded: "<bos>def main():\n\tpass",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u2581a", "\u2581=", "\u2581obj", ".", "toString", "();", "\n", "toString", "();"],
+      ids: [2, 1243, 476, 589, 6555, 235265, 7114, 821, 108, 7114, 821],
+      decoded: "<bos>let a = obj.toString();\ntoString();",
+    },
+    NEWLINES: {
+      text: LLAMA_TEST_STRINGS.NEWLINES,
+      tokens: ["ax", "\n", "####", "\n", "boo"],
+      ids: [2, 1247, 108, 3308, 108, 31931],
+      decoded: "<bos>ax\n####\nboo",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "want", "\u00e9d", ",", "running"],
+      ids: [2, 2019, 29007, 45346, 235269, 23655],
+      decoded: "<bos>UNwant\u00e9d,running",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "<0x00>", "2", "\ufffd", "3"],
+      ids: [2, 235274, 217, 235284, 236193, 235304],
+      decoded: "<bos>1\u00002\ufffd3",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hello", "\u2581World"],
+      ids: [2, 4521, 3855],
+      decoded: "<bos>Hello World",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello", "\u2581world"],
+      ids: [2, 17534, 2134],
+      decoded: "<bos>hello world",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u751f\u6d3b\u7684", "\u771f", "\u8c1b", "\u662f"],
+      ids: [2, 122182, 235710, 245467, 235427],
+      decoded: "<bos>\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u2581\u2581\u2581", "leading", "\u2581space"],
+      ids: [2, 140, 26650, 3641],
+      decoded: "<bos>   leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["trailing", "\u2581space", "\u2581\u2581\u2581"],
+      ids: [2, 100504, 3641, 140],
+      decoded: "<bos>trailing space   ",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["Hi", "\u2581\u2581", "Hello"],
+      ids: [2, 2151, 139, 4521],
+      decoded: "<bos>Hi  Hello",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u2581$", "1", "\u2581R", "2", "\u2581#", "3", "\u2581\u20ac", "4", "\u2581\u00a3", "5", "\u2581\u00a5", "6", "\u2581", "\u20a3", "7", "\u2581\u20b9", "8", "\u2581", "\u20b1", "9", "\u2581test"],
+      ids: [2, 2195, 697, 235274, 625, 235284, 1700, 235304, 8296, 235310, 5955, 235308, 74393, 235318, 235248, 252058, 235324, 56712, 235321, 235248, 243132, 235315, 2121],
+      decoded: "<bos>test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u2581bought", "\u2581an", "\u2581apple", "\u2581for", "\u2581$", "1", ".", "0", "0", "\u2581at", "\u2581the", "\u2581store", "."],
+      ids: [2, 235285, 8989, 671, 15491, 604, 697, 235274, 235265, 235276, 235276, 696, 573, 4659, 235265],
+      decoded: "<bos>I bought an apple for $1.00 at the store.",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u2026", "\u2581\u2581"],
+      ids: [2, 4747, 235417, 139],
+      decoded: "<bos>you\u2026  ",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u2026", "\u00a0\u00a0"],
+      ids: [2, 4747, 235417, 25445],
+      decoded: "<bos>you\u2026\u00a0\u00a0",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u2026", "\u00a0\u00a0", "you", "\u2026", "\u00a0\u00a0"],
+      ids: [2, 4747, 235417, 25445, 4747, 235417, 25445],
+      decoded: "<bos>you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["weird", "\u2581\uff5e", "\u2581edge", "\u2581\uff5e", "\u2581case"],
+      ids: [2, 102422, 134012, 8541, 134012, 2270],
+      decoded: "<bos>weird \uff5e edge \uff5e case",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581This", "\u2581\u2581", "is", "\u2581\u2581", "a", "\u2581\u2581", "test", "\u2581\u2581", "."],
+      ids: [2, 1417, 139, 502, 139, 235250, 139, 2195, 139, 235265],
+      decoded: "<bos> This  is  a  test  .",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\ud83d\ude02", "\u2581\ud83d\udc4d", "\u2581\ud83e\udd23", "\u2581\ud83d\ude0d", "\u2581\ud83d\ude2d", "\u2581\ud83c\udf89", "\u2581\ud83d\ude4f", "\u2581\ud83d\ude0a", "\u2581\ud83d\udd25", "\u2581\ud83d\ude01", "\u2581\ud83d\ude05", "\u2581\ud83e\udd17", "\u2581\ud83d\ude06", "\u2581\ud83d\udc4f", "\u2581\u2764\ufe0f", "\u2581\ud83d\udc9c", "\u2581\ud83d\udc9a", "\u2581\ud83d\udc97", "\u2581\ud83d\udc99", "\u2581\ud83d\udda4", "\u2581\ud83d\ude0e", "\u2581\ud83d\udc4c", "\u2581\ud83e\udd73", "\u2581\ud83d\udcaa", "\u2581\u2728", "\u2581\ud83d\udc49", "\u2581\ud83d\udc40", "\u2581\ud83d\udcaf", "\u2581", "\ud83c\udf88", "\u2581", "\ud83d\ude48", "\u2581\ud83d\ude4c", "\u2581\ud83d\udc80", "\u2581\ud83d\udc47", "\u2581\ud83d\udc4b", "\u2581\u2705", "\u2581", "\ud83c\udf81", "\u2581", "\ud83c\udf1e", "\u2581\ud83c\udf38", "\u2581", "\ud83d\udcb0"],
+      ids: [2, 236471, 38104, 55937, 46434, 55605, 160588, 68226, 44416, 72373, 70636, 75298, 156808, 120433, 104492, 35373, 131674, 191384, 204903, 146773, 166620, 87949, 83860, 211978, 142816, 64726, 166368, 108892, 174882, 235248, 242431, 235248, 241259, 134540, 106918, 154601, 169692, 92641, 235248, 241227, 235248, 241971, 233958, 235248, 241034],
+      decoded: "<bos>\ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2728", "\u2581\ud83e\udd17", "\u2581", "\ud83d\udc41", "\ufe0f", "\u2581", "\ud83d\udc71", "\ud83c\udffb", "\u2581", "\ud83d\udd75", "\u200d\u2642\ufe0f", "\u2581", "\ud83e\uddd9", "\ud83c\udffb", "\u200d\u2642", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u200d", "\ud83c\udf3e", "\u2581", "\ud83e\uddd1", "\u200d", "\ud83e\udd1d", "\u200d", "\ud83e\uddd1", "\u2581", "\ud83d\udc69", "\u200d", "\u2764", "\u200d", "\ud83d\udc8b", "\u200d", "\ud83d\udc68", "\u2581", "\ud83d\udc69", "\u200d", "\ud83d\udc69", "\u200d", "\ud83d\udc67", "\u200d", "\ud83d\udc66", "\u2581", "\ud83e\uddd1", "\ud83c\udffb", "\u200d", "\ud83e\udd1d", "\u200d", "\ud83e\uddd1", "\ud83c\udffb", "\u2581", "\ud83c\udff4", "\udb40\udc67", "\udb40\udc62", "\udb40\udc65", "\udb40\udc6e", "\udb40\udc67", "\udb40\udc7f", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u200d", "\u2764\ufe0f", "\u200d", "\ud83d\udc8b", "\u200d", "\ud83d\udc68", "\ud83c\udffc"],
+      ids: [2, 236309, 156808, 235248, 241666, 235969, 235248, 247216, 237933, 235248, 246522, 68399, 235248, 246422, 237933, 63233, 235248, 241568, 237933, 235879, 244448, 235248, 243634, 235879, 241668, 235879, 243634, 235248, 241355, 235879, 236457, 235879, 240887, 235879, 241568, 235248, 241355, 235879, 241355, 235879, 244355, 235879, 244670, 235248, 243634, 237933, 235879, 241668, 235879, 243634, 237933, 235248, 244443, 246738, 247704, 250142, 250123, 246738, 247662, 235248, 241568, 237933, 235879, 16176, 235879, 240887, 235879, 241568, 238683],
+      decoded: "<bos>\u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75\u200d\u2642\ufe0f \ud83e\uddd9\ud83c\udffb\u200d\u2642 \ud83d\udc68\ud83c\udffb\u200d\ud83c\udf3e \ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1 \ud83d\udc69\u200d\u2764\u200d\ud83d\udc8b\u200d\ud83d\udc68 \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 \ud83e\uddd1\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc68\ud83c\udffc",
+    },
+    BPE_SCORES_PRIORITY_1: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_1,
+      tokens: ["grab", "bed"],
+      ids: [2, 59031, 2907],
+      decoded: "<bos>grabbed",
+    },
+    BPE_SCORES_PRIORITY_2: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_2,
+      tokens: ["\u2581grabbed"],
+      ids: [2, 41939],
+      decoded: "<bos> grabbed",
+    },
+    BPE_SCORES_PRIORITY_3: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_3,
+      tokens: ["\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581", "grab", "bed"],
+      ids: [2, 148, 59031, 2907],
+      decoded: "<bos>           grabbed",
+    },
+    NEWLINE: {
+      text: LLAMA_TEST_STRINGS.NEWLINE,
+      tokens: ["\n"],
+      ids: [2, 108],
+      decoded: "<bos>\n",
+    },
+    NEWLINE_WITH_LEADING_SPACE: {
+      text: LLAMA_TEST_STRINGS.NEWLINE_WITH_LEADING_SPACE,
+      tokens: ["\u2581", "\n"],
+      ids: [2, 235248, 108],
+      decoded: "<bos> \n",
+    },
+    TABS: {
+      text: LLAMA_TEST_STRINGS.TABS,
+      tokens: ["\t", "tabs", "\t\t\t\t", "out", "\u2581here"],
+      ids: [2, 226, 31973, 255971, 745, 1517],
+      decoded: "<bos>\ttabs\t\t\t\tout here",
+    },
+    NEWLINE_AND_TAB: {
+      text: LLAMA_TEST_STRINGS.NEWLINE_AND_TAB,
+      tokens: ["\n", "\t", "\n"],
+      ids: [2, 108, 226, 108],
+      decoded: "<bos>\n\t\n",
+    },
+    CHINESE_LETTER: {
+      text: LLAMA_TEST_STRINGS.CHINESE_LETTER,
+      tokens: ["\u9547"],
+      ids: [2, 237796],
+      decoded: "<bos>\u9547",
+    },
+    EMOJIS_1: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_1,
+      tokens: ["\ud83e\udd99"],
+      ids: [2, 250645],
+      decoded: "<bos>\ud83e\udd99",
+    },
+    EMOJIS_2: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_2,
+      tokens: ["\ud83e\udd99", "<0xEA>", "<0x99>", "<0x8A>"],
+      ids: [2, 250645, 451, 370, 355],
+      decoded: "<bos>\ud83e\udd99\ua64a",
+    },
+    EMOJIS_3: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_3,
+      tokens: ["<0xEA>", "<0x99>", "<0x8A>", "\ud83e\udd99"],
+      ids: [2, 451, 370, 355, 250645],
+      decoded: "<bos>\ua64a\ud83e\udd99",
+    },
+    PARAGRAPH: {
+      text: LLAMA_TEST_STRINGS.PARAGRAPH,
+      tokens: ["The", "\u2581llama", "\u2581(/", "\u02c8", "l", "\u0251", "\u02d0", "m\u0259", "/;", "\u2581", "\ud83e\udd99", "Spanish", "\u2581pronunciation", ":", "\u2581[", "\u02c8", "\u028e", "ama", "])", "\u2581(", "Lama", "\u2581g", "lama", ")", "\u2581is", "\u2581a", "\u2581domesticated", "\u2581South", "\u2581American", "\u2581came", "lid", ",", "\u2581widely", "\u2581used", "\u2581as", "\u2581a", "\u2581meat", "\u2581and", "\u2581pack", "\u2581animal", "\u2581by", "\u2581Andean", "\u2581cultures", "\u2581since", "\u2581the", "\u2581Pre", "-", "Columb", "ian", "\u2581era", ".", "\u2581Lla", "mas", "\u2581are", "\u2581social", "\u2581animals", "\u2581and", "\u2581live", "\u2581with", "\u2581others", "\u2581as", "\u2581a", "\u2581herd", ".", "\u2581Their", "\u2581wool", "\u2581is", "\u2581soft", "\u2581and", "\u2581contains", "\u2581only", "\u2581a", "\u2581small", "\u2581amount", "\u2581of", "\u2581lan", "olin", ".[", "2", "]", "\u2581Lla", "mas", "\u2581can", "\u2581learn", "\u2581simple", "\u2581tasks", "\u2581after", "\u2581a", "\u2581few", "\u2581repetitions", ".", "\u2581When", "\u2581using", "\u2581a", "\u2581pack", ",", "\u2581they", "\u2581can", "\u2581carry", "\u2581about", "\u2581", "2", "5", "\u2581to", "\u2581", "3", "0", "%", "\u2581of", "\u2581their", "\u2581body", "\u2581weight", "\u2581for", "\u2581", "8", "\u2581to", "\u2581", "1", "3", "\u2581km", "\u2581(", "5", "\u2013", "8", "\u2581miles", ").[", "3", "]", "\u2581The", "\u2581name", "\u2581llama", "\u2581(", "in", "\u2581the", "\u2581past", "\u2581also", "\u2581spelled", '\u2581"', "lama", '"', "\u2581or", '\u2581"', "g", "lama", '")', "\u2581was", "\u2581adopted", "\u2581by", "\u2581European", "\u2581settlers", "\u2581from", "\u2581native", "\u2581Peru", "vi", "ans", ".[", "4", "]", "\u2581The", "\u2581ancestors", "\u2581of", "\u2581llamas", "\u2581are", "\u2581thought", "\u2581to", "\u2581have", "\u2581originated", "\u2581from", "\u2581the", "\u2581Great", "\u2581Plains", "\u2581of", "\u2581North", "\u2581America", "\u2581about", "\u2581", "4", "0", "\u2581million", "\u2581years", "\u2581ago", ",", "\u2581and", "\u2581subsequently", "\u2581migrated", "\u2581to", "\u2581South", "\u2581America", "\u2581about", "\u2581three", "\u2581million", "\u2581years", "\u2581ago", "\u2581during", "\u2581the", "\u2581Great", "\u2581American", "\u2581Interchange", ".", "\u2581By", "\u2581the", "\u2581end", "\u2581of", "\u2581the", "\u2581last", "\u2581ice", "\u2581age", "\u2581(", "1", "0", ",", "0", "0", "0", "\u2013", "1", "2", ",", "0", "0", "0", "\u2581years", "\u2581ago", "),", "\u2581came", "lids", "\u2581were", "\u2581extinct", "\u2581in", "\u2581North", "\u2581America", ".[", "3", "]", "\u2581As", "\u2581of", "\u2581", "2", "0", "0", "7", ",", "\u2581there", "\u2581were", "\u2581over", "\u2581seven", "\u2581million", "\u2581llamas", "\u2581and", "\u2581al", "pac", "as", "\u2581in", "\u2581South", "\u2581America", "\u2581and", "\u2581over", "\u2581", "1", "5", "8", ",", "0", "0", "0", "\u2581llamas", "\u2581and", "\u2581", "1", "0", "0", ",", "0", "0", "0", "<0xEA>", "<0x99>", "<0x8A>", "\ud83e\udd99", "\u2581al", "pac", "as", ",", "\u2581descended", "\u2581from", "\u2581progen", "itors", "\u2581imported", "\u2581late", "\u2581in", "\u2581the", "\u2581", "2", "0", "th", "\u2581century", ",", "\u2581in", "\u2581the", "\u2581United", "\u2581States", "\u2581and", "\u2581Canada", ".[", "5", "]", "\u2581In", "\u2581A", "ym", "ara", "\u2581mythology", ",", "\u2581llamas", "\u2581are", "\u2581important", "\u2581beings", ".", "\u2581The", "\u2581Heavenly", "\u2581Llama", "\u2581is", "\u2581said", "\u2581to", "\u2581drink", "\u2581water", "\u2581from", "\u2581the", "\u2581ocean", "\u2581and", "\u2581urin", "ates", "\u2581as", "\u2581it", "\u2581rains", ".[", "6", "]", "\u2581According", "\u2581to", "\u2581A", "ym", "ara", "\u2581es", "ch", "atology", ",", "\u2581llamas", "\u2581will", "\u2581return", "\u2581to", "\u2581the", "\u2581water", "\u2581springs", "\u2581and", "\u2581lagoons", "\u2581where", "\u2581they", "\u2581come", "\u2581from", "\u2581at", "\u2581the", "\u2581end", "\u2581of", "\u2581time", ".[", "6", "]"],
+      ids: [2, 651, 19001, 101949, 239229, 235257, 240527, 240342, 128631, 102430, 235248, 250645, 51590, 74569, 235292, 892, 239229, 246752, 2867, 3013, 591, 221520, 583, 10450, 235275, 603, 476, 183304, 4316, 3725, 3392, 3353, 235269, 16714, 1671, 685, 476, 11827, 578, 3386, 8205, 731, 207552, 24541, 2754, 573, 2769, 235290, 222963, 1282, 6063, 235265, 172809, 2616, 708, 3127, 8398, 578, 3685, 675, 3588, 685, 476, 48010, 235265, 10368, 23834, 603, 4072, 578, 7744, 1297, 476, 2301, 3619, 576, 7607, 28424, 19047, 235284, 235307, 172809, 2616, 798, 3918, 3890, 13333, 1452, 476, 2619, 126286, 235265, 3194, 2177, 476, 3386, 235269, 984, 798, 6383, 1105, 235248, 235284, 235308, 577, 235248, 235304, 235276, 235358, 576, 1024, 2971, 5171, 604, 235248, 235321, 577, 235248, 235274, 235304, 5821, 591, 235308, 235389, 235321, 7112, 232524, 235304, 235307, 714, 1503, 19001, 591, 473, 573, 3433, 1170, 73003, 664, 10450, 235281, 689, 664, 235264, 10450, 1388, 729, 13861, 731, 7737, 57710, 774, 11634, 30160, 893, 779, 19047, 235310, 235307, 714, 44106, 576, 129953, 708, 3421, 577, 791, 52102, 774, 573, 6553, 55118, 576, 4612, 5783, 1105, 235248, 235310, 235276, 4416, 1658, 3958, 235269, 578, 27956, 106398, 577, 4316, 5783, 1105, 2149, 4416, 1658, 3958, 2290, 573, 6553, 3725, 193879, 235265, 3339, 573, 1580, 576, 573, 2001, 8357, 3911, 591, 235274, 235276, 235269, 235276, 235276, 235276, 235389, 235274, 235284, 235269, 235276, 235276, 235276, 1658, 3958, 823, 3392, 41253, 1049, 78561, 575, 4612, 5783, 19047, 235304, 235307, 1877, 576, 235248, 235284, 235276, 235276, 235324, 235269, 1104, 1049, 1163, 6861, 4416, 129953, 578, 717, 23337, 508, 575, 4316, 5783, 578, 1163, 235248, 235274, 235308, 235321, 235269, 235276, 235276, 235276, 129953, 578, 235248, 235274, 235276, 235276, 235269, 235276, 235276, 235276, 451, 370, 355, 250645, 717, 23337, 508, 235269, 64700, 774, 66279, 15517, 29271, 5245, 575, 573, 235248, 235284, 235276, 489, 7861, 235269, 575, 573, 3520, 3858, 578, 6591, 19047, 235308, 235307, 878, 586, 3985, 1610, 76701, 235269, 129953, 708, 2845, 27290, 235265, 714, 89830, 170669, 603, 1180, 577, 7182, 2003, 774, 573, 13940, 578, 111204, 1204, 685, 665, 50852, 19047, 235318, 235307, 11926, 577, 586, 3985, 1610, 875, 530, 92764, 235269, 129953, 877, 2203, 577, 573, 2003, 31104, 578, 221493, 1570, 984, 2063, 774, 696, 573, 1580, 576, 1069, 19047, 235318, 235307],
+      decoded: '<bos>The llama (/\u02c8l\u0251\u02d0m\u0259/; \ud83e\udd99Spanish pronunciation: [\u02c8\u028eama]) (Lama glama) is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the Pre-Columbian era. Llamas are social animals and live with others as a herd. Their wool is soft and contains only a small amount of lanolin.[2] Llamas can learn simple tasks after a few repetitions. When using a pack, they can carry about 25 to 30% of their body weight for 8 to 13 km (5\u20138 miles).[3] The name llama (in the past also spelled "lama" or "glama") was adopted by European settlers from native Peruvians.[4] The ancestors of llamas are thought to have originated from the Great Plains of North America about 40 million years ago, and subsequently migrated to South America about three million years ago during the Great American Interchange. By the end of the last ice age (10,000\u201312,000 years ago), camelids were extinct in North America.[3] As of 2007, there were over seven million llamas and alpacas in South America and over 158,000 llamas and 100,000\ua64a\ud83e\udd99 alpacas, descended from progenitors imported late in the 20th century, in the United States and Canada.[5] In Aymara mythology, llamas are important beings. The Heavenly Llama is said to drink water from the ocean and urinates as it rains.[6] According to Aymara eschatology, llamas will return to the water springs and lagoons where they come from at the end of time.[6]',
+    },
+  },
+};
diff --git a/tests/models/gpt2/tokenization.js b/tests/models/gpt2/tokenization.js
new file mode 100644
index 000000000..c573e7b5c
--- /dev/null
+++ b/tests/models/gpt2/tokenization.js
@@ -0,0 +1,462 @@
+import { GPT2Tokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, SENTENCEPIECE_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = GPT2Tokenizer;
+export const TEST_CONFIG = {
+  // - clean_up_tokenization_spaces=true
+  // - default pretokenization regex
+  "Xenova/gpt2": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "\u0120are", "\u0120you", "\u0120doing", "?"],
+      ids: [2437, 389, 345, 1804, 30],
+      decoded: "How are you doing?",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "\u0120should", "'ve", "\u0120done", "\u0120this"],
+      ids: [1639, 815, 1053, 1760, 428],
+      decoded: "You should've done this",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["01", "23", "45", "67", "89", "Ġ0", "Ġ1", "Ġ2", "Ġ3", "Ġ4", "Ġ5", "Ġ6", "Ġ7", "Ġ8", "Ġ9", "Ġ10", "Ġ100", "Ġ1000"],
+      ids: [486, 1954, 2231, 3134, 4531, 657, 352, 362, 513, 604, 642, 718, 767, 807, 860, 838, 1802, 8576],
+      decoded: "0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120founded", "\u0120in", "\u01202016", "."],
+      ids: [464, 1664, 373, 9393, 287, 1584, 13],
+      decoded: "The company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'ll", "\u0120!!", "to", "?'", "d", "''", "d", "\u0120of", ",", "\u0120can", "'t", "."],
+      ids: [32, 198, 1183, 37867, 1462, 8348, 67, 7061, 67, 286, 11, 460, 470, 13],
+      decoded: "A\n'll!!to?'d''d of, can't.",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "\u0120main", "():", "\u010a", "\u0109", "pass"],
+      ids: [4299, 1388, 33529, 198, 197, 6603],
+      decoded: "def main():\n\tpass",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120obj", ".", "to", "String", "();", "\u010a", "to", "String", "();"],
+      ids: [1616, 257, 796, 26181, 13, 1462, 10100, 9783, 198, 1462, 10100, 9783],
+      decoded: "let a = obj.toString();\ntoString();",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["This", "\u010a", "\u010a", "is", "\u010a", "a", "\u010a", "test", "."],
+      ids: [1212, 198, 198, 271, 198, 64, 198, 9288, 13],
+      decoded: "This\n\nis\na\ntest.",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "want", "\u00c3\u00a9", "d", ",", "running"],
+      ids: [4944, 42949, 2634, 67, 11, 20270],
+      decoded: "UNwant\u00e9d,running",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0100", "2", "\u00ef\u00bf\u00bd", "3"],
+      ids: [16, 188, 17, 4210, 18],
+      decoded: "1\u00002\ufffd3",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hello", "\u0120World"],
+      ids: [15496, 2159],
+      decoded: "Hello World",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello", "\u0120world"],
+      ids: [31373, 995],
+      decoded: "hello world",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141", "\u00e6", "\u00b4", "\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e", "\u0141", "\u00e8", "\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [37955, 162, 112, 119, 21410, 40367, 253, 164, 108, 249, 42468],
+      decoded: "\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120", "\u0120", "\u0120leading", "\u0120space"],
+      ids: [220, 220, 3756, 2272],
+      decoded: "   leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tra", "iling", "\u0120space", "\u0120", "\u0120", "\u0120"],
+      ids: [9535, 4386, 2272, 220, 220, 220],
+      decoded: "trailing space   ",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["Hi", "\u0120", "\u0120Hello"],
+      ids: [17250, 220, 18435],
+      decoded: "Hi  Hello",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2\u00a5", "6", "\u0120\u00e2", "\u0124", "\u00a3", "7", "\u0120\u00e2", "\u0124", "\u00b9", "8", "\u0120\u00e2", "\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [9288, 720, 16, 371, 17, 1303, 18, 10432, 19, 4248, 20, 38221, 21, 2343, 224, 96, 22, 2343, 224, 117, 23, 2343, 224, 109, 24, 1332],
+      decoded: "test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120bought", "\u0120an", "\u0120apple", "\u0120for", "\u0120$", "1", ".", "00", "\u0120at", "\u0120the", "\u0120store", "."],
+      ids: [40, 5839, 281, 17180, 329, 720, 16, 13, 405, 379, 262, 3650, 13],
+      decoded: "I bought an apple for $1.00 at the store.",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u0120", "\u0120"],
+      ids: [5832, 1399, 220, 220],
+      decoded: "you\u2026  ",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [5832, 1399, 4603],
+      decoded: "you\u2026\u00a0\u00a0",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142", "\u00c2\u0142", "you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [5832, 1399, 1849, 1849, 5832, 1399, 4603],
+      decoded: "you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120\u00ef", "\u00bd", "\u0140", "\u0120edge", "\u0120\u00ef", "\u00bd", "\u0140", "\u0120case"],
+      ids: [732, 1447, 27332, 121, 252, 5743, 27332, 121, 252, 1339],
+      decoded: "weird \uff5e edge \uff5e case",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u00e2\u0138", "\u0123", "This", "\u0120\u00e2\u0138", "\u0123", "is", "\u0120\u00e2\u0138", "\u0123", "a", "\u0120\u00e2\u0138", "\u0123", "test", "\u0120\u00e2\u0138", "\u0123", "."],
+      ids: [5008, 223, 1212, 11019, 223, 271, 11019, 223, 64, 11019, 223, 9288, 11019, 223, 13],
+      decoded: "\u2581This \u2581is \u2581a \u2581test \u2581.",
+    },
+    SPECIAL_WITH_TRAILING_WHITESPACE: {
+      text: SENTENCEPIECE_TEST_STRINGS.SPECIAL_WITH_TRAILING_WHITESPACE,
+      tokens: ["<", "s", ">", "\u010a"],
+      ids: [27, 82, 29, 198],
+      decoded: "<s>\n",
+    },
+    SPECIAL_SURROUNDED_BY_WHITESPACE: {
+      text: SENTENCEPIECE_TEST_STRINGS.SPECIAL_SURROUNDED_BY_WHITESPACE,
+      tokens: ["\u0120</", "s", ">", "\u0120test", "\u0120</", "s", ">", "\u0120"],
+      ids: [7359, 82, 29, 1332, 7359, 82, 29, 220],
+      decoded: " </s> test </s> ",
+    },
+    SPECIAL_NO_WHITESPACE: {
+      text: SENTENCEPIECE_TEST_STRINGS.SPECIAL_NO_WHITESPACE,
+      tokens: ["</", "s", ">", "test", "</", "s", ">"],
+      ids: [3556, 82, 29, 9288, 3556, 82, 29],
+      decoded: "</s>test</s>",
+    },
+  },
+  // - clean_up_tokenization_spaces=false
+  // - custom pretokenization regex
+  "Xenova/gpt-4": {
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'ll", "\u0120!!", "to", "?'", "d", "''", "d", "\u0120of", ",", "\u0120can", "'t", "."],
+      ids: [32, 198, 3358, 11261, 998, 20837, 67, 4708, 67, 315, 11, 649, 956, 13],
+      decoded: "A\n'll !!to?'d''d of, can't.",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120obj", ".toString", "();\u010a", "toString", "();"],
+      ids: [1169, 264, 284, 2909, 5180, 545, 6712, 2178],
+      decoded: "let a = obj.toString();\ntoString();",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2\u00a5", "6", "\u0120\u00e2", "\u0124", "\u00a3", "7", "\u0120\u00e2\u0124\u00b9", "8", "\u0120\u00e2", "\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [1985, 400, 16, 432, 17, 674, 18, 13281, 19, 7083, 20, 72588, 21, 2928, 224, 96, 22, 90891, 23, 2928, 224, 109, 24, 1296],
+      decoded: "test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120", "\u00ef\u00bd\u0140", "\u0120edge", "\u0120", "\u00ef\u00bd\u0140", "\u0120case"],
+      ids: [906, 2668, 220, 21909, 6964, 220, 21909, 1162],
+      decoded: "weird \uff5e edge \uff5e case",
+    },
+  },
+  "Xenova/gpt-4o": {
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["012", "345", "678", "9", "Ġ", "0", "Ġ", "1", "Ġ", "2", "Ġ", "3", "Ġ", "4", "Ġ", "5", "Ġ", "6", "Ġ", "7", "Ġ", "8", "Ġ", "9", "Ġ", "10", "Ġ", "100", "Ġ", "100", "0"],
+      ids: [19267, 22901, 30833, 24, 220, 15, 220, 16, 220, 17, 220, 18, 220, 19, 220, 20, 220, 21, 220, 22, 220, 23, 220, 24, 220, 702, 220, 1353, 220, 1353, 15],
+      decoded: "0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120founded", "\u0120in", "\u0120", "201", "6", "."],
+      ids: [976, 3175, 673, 24303, 306, 220, 667, 21, 13],
+      decoded: "The company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'ll", "\u0120!!", "to", "?'", "d", "''", "d", "\u0120of", ",", "\u0120can't", "."],
+      ids: [32, 198, 6090, 17131, 935, 48511, 67, 5830, 67, 328, 11, 8535, 13],
+      decoded: "A\n'll !!to?'d''d of, can't.",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "\u0120main", "():\u010a", "\u0109pass"],
+      ids: [1314, 2758, 8595, 100653],
+      decoded: "def main():\n\tpass",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120obj", ".to", "String", "();\u010a", "to", "String", "();"],
+      ids: [1347, 261, 314, 4099, 3552, 916, 740, 935, 916, 4177],
+      decoded: "let a = obj.toString();\ntoString();",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["This", "\u010a\u010a", "is", "\u010a", "a", "\u010a", "test", "."],
+      ids: [2500, 279, 276, 198, 64, 198, 3190, 13],
+      decoded: "This\n\nis\na\ntest.",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "want", "\u00c3\u00a9d", ",r", "unning"],
+      ids: [2926, 72517, 6383, 33654, 11244],
+      decoded: "UNwant\u00e9d,running",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141\u00e6\u00b4\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e\u0141", "\u00e8\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [32479, 1616, 7910, 7856, 249, 3221],
+      decoded: "\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120\u0120", "\u0120leading", "\u0120space"],
+      ids: [256, 8117, 4918],
+      decoded: "   leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tr", "ailing", "\u0120space", "\u0120\u0120\u0120"],
+      ids: [371, 24408, 4918, 271],
+      decoded: "trailing space   ",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2\u00a5", "6", "\u0120\u00e2\u0124", "\u00a3", "7", "\u0120\u00e2\u0124\u00b9", "8", "\u0120\u00e2\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [3190, 548, 16, 460, 17, 1069, 18, 7950, 19, 8989, 20, 123814, 21, 59790, 96, 22, 73406, 23, 59790, 109, 24, 1746],
+      decoded: "test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u0120\u0120"],
+      ids: [13320, 1131, 256],
+      decoded: "you\u2026  ",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120\u00ef\u00bd\u0140", "\u0120edge", "\u0120\u00ef\u00bd\u0140", "\u0120case"],
+      ids: [854, 2716, 105665, 11165, 105665, 1890],
+      decoded: "weird \uff5e edge \uff5e case",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u00e2\u0138", "\u0123", "This", "\u0120\u00e2\u0138\u0123", "is", "\u0120\u00e2\u0138\u0123", "a", "\u0120\u00e2\u0138\u0123", "test", "\u0120\u00e2\u0138\u0123", "."],
+      ids: [6762, 223, 2500, 39960, 276, 39960, 64, 39960, 3190, 39960, 13],
+      decoded: "\u2581This \u2581is \u2581a \u2581test \u2581.",
+    },
+    SPECIAL_WITH_TRAILING_WHITESPACE: {
+      text: SENTENCEPIECE_TEST_STRINGS.SPECIAL_WITH_TRAILING_WHITESPACE,
+      tokens: ["<s", ">\u010a"],
+      ids: [101950, 523],
+      decoded: "<s>\n",
+    },
+  },
+  "Xenova/claude-tokenizer": {
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120obj", ".", "toString", "();", "\u010a", "toString", "();"],
+      ids: [1785, 269, 284, 2652, 18, 26492, 4370, 203, 26492, 4370],
+      decoded: "let a = obj.toString();\ntoString();",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "want", "\u00c3\u00a9d", ",", "running"],
+      ids: [2359, 17571, 37911, 16, 7889],
+      decoded: "UNwant\u00e9d,running",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141", "\u00e6\u00b4\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e\u0141", "\u00e8\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [14706, 37675, 2471, 56904, 15959, 254, 5977],
+      decoded: "\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["trailing", "\u0120space", "\u0120\u0120\u0120"],
+      ids: [40110, 3384, 264],
+      decoded: "trailing space   ",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2", "\u00a5", "6", "\u0120\u00e2", "\u0124", "\u00a3", "7", "\u0120\u00e2", "\u0124", "\u00b9", "8", "\u0120\u00e2", "\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [765, 734, 21, 487, 22, 379, 23, 36714, 24, 13206, 25, 2455, 103, 26, 4937, 229, 101, 27, 4937, 229, 122, 28, 4937, 229, 114, 29, 722],
+      decoded: "test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "...", "\u0120\u0120"],
+      ids: [6773, 1174, 261],
+      decoded: "you...  ",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "...", "\u0120\u0120"],
+      ids: [6773, 1174, 261],
+      decoded: "you...  ",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "...", "\u0120", "\u0120you", "...", "\u0120\u0120"],
+      ids: [6773, 1174, 225, 583, 1174, 261],
+      decoded: "you...  you...  ",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120~", "\u0120edge", "\u0120~", "\u0120case"],
+      ids: [798, 2650, 6217, 4915, 6217, 1544],
+      decoded: "weird ~ edge ~ case",
+    },
+  },
+  "bigcode/santacoder": {
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "Ġ", "0", "Ġ", "1", "Ġ", "2", "Ġ", "3", "Ġ", "4", "Ġ", "5", "Ġ", "6", "Ġ", "7", "Ġ", "8", "Ġ", "9", "Ġ", "1", "0", "Ġ", "1", "0", "0", "Ġ", "1", "0", "0", "0"],
+      ids: [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 207, 15, 207, 16, 207, 17, 207, 18, 207, 19, 207, 20, 207, 21, 207, 22, 207, 23, 207, 24, 207, 16, 15, 207, 16, 15, 15, 207, 16, 15, 15, 15],
+      decoded: "0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120fo", "unded", "\u0120in", "\u0120", "2", "0", "1", "6", "."],
+      ids: [2111, 10107, 2501, 17436, 7584, 319, 207, 17, 15, 16, 21, 13],
+      decoded: "The company was founded in 2016.",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141", "\u00e6\u00b4\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e\u0141", "\u00e8\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [8715, 24543, 1825, 34717, 37452, 236, 4343],
+      decoded: "\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120bo", "ught", "\u0120an", "\u0120apple", "\u0120for", "\u0120$", "1", ".", "0", "0", "\u0120at", "\u0120the", "\u0120store", "."],
+      ids: [40, 12307, 10310, 743, 29806, 408, 763, 16, 13, 15, 15, 869, 331, 2823, 13],
+      decoded: "I bought an apple for $1.00 at the store.",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120", "\u00ef\u00bd", "\u0140", "\u0120edge", "\u0120", "\u00ef\u00bd", "\u0140", "\u0120case"],
+      ids: [1850, 4427, 207, 29217, 239, 4959, 207, 29217, 239, 1210],
+      decoded: "weird \uff5e edge \uff5e case",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u00e2\u0138", "\u0123", "This", "\u0120", "\u00e2\u0138", "\u0123", "is", "\u0120", "\u00e2\u0138", "\u0123", "a", "\u0120", "\u00e2\u0138", "\u0123", "test", "\u0120", "\u00e2\u0138", "\u0123", "."],
+      ids: [3718, 210, 3456, 207, 3718, 210, 280, 207, 3718, 210, 64, 207, 3718, 210, 706, 207, 3718, 210, 13],
+      decoded: "\u2581This \u2581is \u2581a \u2581test \u2581.",
+    },
+  },
+  "Xenova/CodeGPT-tokenizer": {
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141", "\u00e6", "\u00b4", "\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e", "\u0141", "\u00e8\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [25506, 165, 115, 122, 5137, 43415, 256, 20679, 252, 13283],
+      decoded: "\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["trailing", "\u0120space", "\u0120", "\u0120", "\u0120"],
+      ids: [15584, 3497, 223, 223, 223],
+      decoded: "trailing space   ",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142"],
+      ids: [13953, 29502, 129, 257, 129, 257],
+      decoded: "you\u2026\u00a0\u00a0",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142", "you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142"],
+      ids: [13953, 29502, 129, 257, 129, 257, 13953, 29502, 129, 257, 129, 257],
+      decoded: "you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0",
+    },
+  },
+  "huggingface-course/codeparrot-ds": {
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0123456789", "Ġ0", "Ġ1", "Ġ2", "Ġ3", "Ġ4", "Ġ5", "Ġ6", "Ġ7", "Ġ8", "Ġ9", "Ġ10", "Ġ100", "Ġ1000"],
+      ids: [25218, 443, 396, 554, 869, 1163, 1462, 1911, 2624, 2070, 2837, 2009, 3038, 4764],
+      decoded: "0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120fo", "unded", "\u0120in", "\u01202016", "."],
+      ids: [2096, 16502, 1442, 11689, 7865, 253, 8780, 14],
+      decoded: "The company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'ll", "\u0120!", "!", "to", "?'", "d", "''", "d", "\u0120of", ",", "\u0120can", "'t", "."],
+      ids: [33, 173, 6402, 905, 1, 403, 15227, 68, 589, 68, 311, 12, 796, 1059, 14],
+      decoded: "A\n'll!!to?'d''d of, can't.",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120obj", ".", "toString", "();", "\u010a", "toString", "();"],
+      ids: [2047, 231, 233, 1300, 14, 30494, 16248, 173, 30494, 16248],
+      decoded: "let a = obj.toString();\ntoString();",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141", "\u00e6\u00b4", "\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e", "\u0141", "\u00e8\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [20185, 43799, 120, 3994, 37782, 211, 15933, 207, 11130],
+      decoded: "\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["trailing", "\u0120space", "\u0120\u0120\u0120"],
+      ids: [17031, 3000, 216],
+      decoded: "trailing space   ",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2", "\u0124\u00ac", "4", "\u0120\u00c2", "\u00a3", "5", "\u0120\u00c2", "\u00a5", "6", "\u0120\u00e2", "\u0124", "\u00a3", "7", "\u0120\u00e2", "\u0124", "\u00b9", "8", "\u0120\u00e2", "\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [1824, 3748, 17, 683, 18, 294, 19, 5161, 28898, 20, 23446, 97, 21, 23446, 99, 22, 5161, 182, 97, 23, 5161, 182, 118, 24, 5161, 182, 110, 25, 1737],
+      decoded: "test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120bo", "ught", "\u0120an", "\u0120app", "le", "\u0120for", "\u0120$", "1", ".", "00", "\u0120at", "\u0120the", "\u0120store", "."],
+      ids: [41, 772, 8272, 309, 870, 239, 296, 3748, 17, 14, 543, 815, 256, 2689, 14],
+      decoded: "I bought an apple for $1.00 at the store.",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120", "\u00ef", "\u00bd", "\u0140", "\u0120edge", "\u0120", "\u00ef", "\u00bd", "\u0140", "\u0120case"],
+      ids: [955, 6075, 179, 166, 122, 210, 2703, 179, 166, 122, 210, 1539],
+      decoded: "weird \uff5e edge \uff5e case",
+    },
+  },
+};
diff --git a/tests/models/llama/tokenization.js b/tests/models/llama/tokenization.js
new file mode 100644
index 000000000..dcbbe9770
--- /dev/null
+++ b/tests/models/llama/tokenization.js
@@ -0,0 +1,1296 @@
+import { LlamaTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, LLAMA_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = LlamaTokenizer;
+export const TEST_CONFIG = {
+  "Xenova/llama-tokenizer": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["\u2581How", "\u2581are", "\u2581you", "\u2581doing", "?"],
+      ids: [1, 1128, 526, 366, 2599, 29973],
+      decoded: "<s> How are you doing?",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["\u2581You", "\u2581should", "'", "ve", "\u2581done", "\u2581this"],
+      ids: [1, 887, 881, 29915, 345, 2309, 445],
+      decoded: "<s> You should've done this",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["\u2581", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "\u2581", "0", "\u2581", "1", "\u2581", "2", "\u2581", "3", "\u2581", "4", "\u2581", "5", "\u2581", "6", "\u2581", "7", "\u2581", "8", "\u2581", "9", "\u2581", "1", "0", "\u2581", "1", "0", "0", "\u2581", "1", "0", "0", "0"],
+      ids: [1, 29871, 29900, 29896, 29906, 29941, 29946, 29945, 29953, 29955, 29947, 29929, 29871, 29900, 29871, 29896, 29871, 29906, 29871, 29941, 29871, 29946, 29871, 29945, 29871, 29953, 29871, 29955, 29871, 29947, 29871, 29929, 29871, 29896, 29900, 29871, 29896, 29900, 29900, 29871, 29896, 29900, 29900, 29900],
+      decoded: "<s> 0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["\u2581The", "\u2581company", "\u2581was", "\u2581founded", "\u2581in", "\u2581", "2", "0", "1", "6", "."],
+      ids: [1, 450, 5001, 471, 11091, 297, 29871, 29906, 29900, 29896, 29953, 29889],
+      decoded: "<s> The company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["\u2581A", "<0x0A>", "'", "ll", "\u2581!!", "to", "?'", "d", "''", "d", "\u2581of", ",", "\u2581can", "'", "t", "."],
+      ids: [1, 319, 13, 29915, 645, 21443, 517, 17901, 29881, 4907, 29881, 310, 29892, 508, 29915, 29873, 29889],
+      decoded: "<s> A\n'll !!to?'d''d of, can't.",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["\u2581def", "\u2581main", "():", "<0x0A>", "<0x09>", "pass"],
+      ids: [1, 822, 1667, 7295, 13, 12, 3364],
+      decoded: "<s> def main():\n\tpass",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["\u2581let", "\u2581a", "\u2581=", "\u2581obj", ".", "toString", "();", "<0x0A>", "toString", "();"],
+      ids: [1, 1235, 263, 353, 5446, 29889, 7711, 890, 13, 7711, 890],
+      decoded: "<s> let a = obj.toString();\ntoString();",
+    },
+    NEWLINES: {
+      text: LLAMA_TEST_STRINGS.NEWLINES,
+      tokens: ["\u2581ax", "<0x0A>", "####", "<0x0A>", "bo", "o"],
+      ids: [1, 4853, 13, 4136, 13, 833, 29877],
+      decoded: "<s> ax\n####\nboo",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["\u2581UN", "w", "ant", "\u00e9d", ",", "running"],
+      ids: [1, 8291, 29893, 424, 2487, 29892, 21094],
+      decoded: "<s> UNwant\u00e9d,running",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["\u2581", "1", "<0x00>", "2", "\ufffd", "3"],
+      ids: [1, 29871, 29896, 3, 29906, 30140, 29941],
+      decoded: "<s> 1\u00002\ufffd3",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["\u2581Hello", "\u2581World"],
+      ids: [1, 15043, 2787],
+      decoded: "<s> Hello World",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["\u2581hello", "\u2581world"],
+      ids: [1, 22172, 3186],
+      decoded: "<s> hello world",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u2581", "\u751f", "\u6d3b", "\u7684", "\u771f", "<0xE8>", "<0xB0>", "<0x9B>", "\u662f"],
+      ids: [1, 29871, 30486, 31704, 30210, 30848, 235, 179, 158, 30392],
+      decoded: "<s> \u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u2581\u2581\u2581", "\u2581leading", "\u2581space"],
+      ids: [1, 1678, 8236, 2913],
+      decoded: "<s>    leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581trailing", "\u2581space", "\u2581\u2581\u2581"],
+      ids: [1, 25053, 2913, 1678],
+      decoded: "<s> trailing space   ",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["\u2581Hi", "\u2581", "\u2581Hello"],
+      ids: [1, 6324, 29871, 15043],
+      decoded: "<s> Hi  Hello",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["\u2581test", "\u2581$", "1", "\u2581R", "2", "\u2581#", "3", "\u2581\u20ac", "4", "\u2581\u00a3", "5", "\u2581", "\u00a5", "6", "\u2581", "<0xE2>", "<0x82>", "<0xA3>", "7", "\u2581", "\u20b9", "8", "\u2581", "<0xE2>", "<0x82>", "<0xB1>", "9", "\u2581test"],
+      ids: [1, 1243, 395, 29896, 390, 29906, 396, 29941, 25540, 29946, 15151, 29945, 29871, 30563, 29953, 29871, 229, 133, 166, 29955, 29871, 30620, 29947, 29871, 229, 133, 180, 29929, 1243],
+      decoded: "<s> test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["\u2581I", "\u2581bought", "\u2581an", "\u2581apple", "\u2581for", "\u2581$", "1", ".", "0", "0", "\u2581at", "\u2581the", "\u2581store", "."],
+      ids: [1, 306, 18093, 385, 26163, 363, 395, 29896, 29889, 29900, 29900, 472, 278, 3787, 29889],
+      decoded: "<s> I bought an apple for $1.00 at the store.",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u2581you", "\u2026", "\u2581\u2581"],
+      ids: [1, 366, 30098, 259],
+      decoded: "<s> you\u2026  ",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u2581you", "\u2026", "\u00a0\u00a0"],
+      ids: [1, 366, 30098, 8655],
+      decoded: "<s> you\u2026\u00a0\u00a0",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u2581you", "\u2026", "\u00a0\u00a0", "you", "\u2026", "\u00a0\u00a0"],
+      ids: [1, 366, 30098, 8655, 6293, 30098, 8655],
+      decoded: "<s> you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["\u2581weird", "\u2581", "\uff5e", "\u2581edge", "\u2581", "\uff5e", "\u2581case"],
+      ids: [1, 13543, 29871, 30739, 7636, 29871, 30739, 1206],
+      decoded: "<s> weird \uff5e edge \uff5e case",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581", "\u2581This", "\u2581", "\u2581is", "\u2581", "\u2581a", "\u2581", "\u2581test", "\u2581", "\u2581."],
+      ids: [1, 29871, 910, 29871, 338, 29871, 263, 29871, 1243, 29871, 869],
+      decoded: "<s>  This  is  a  test  .",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0x82>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x8D>", "\u2581", "<0xF0>", "<0x9F>", "<0xA4>", "<0xA3>", "\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0x8D>", "\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0xAD>", "\u2581", "<0xF0>", "<0x9F>", "<0x8E>", "<0x89>", "\u2581", "<0xF0>", "<0x9F>", "<0x99>", "<0x8F>", "\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0x8A>", "\u2581", "<0xF0>", "<0x9F>", "<0x94>", "<0xA5>", "\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0x81>", "\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0x85>", "\u2581", "<0xF0>", "<0x9F>", "<0xA4>", "<0x97>", "\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0x86>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x8F>", "\u2581", "<0xE2>", "<0x9D>", "<0xA4>", "\ufe0f", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0x9C>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0x9A>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0x97>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0x99>", "\u2581", "<0xF0>", "<0x9F>", "<0x96>", "<0xA4>", "\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0x8E>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x8C>", "\u2581", "<0xF0>", "<0x9F>", "<0xA5>", "<0xB3>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0xAA>", "\u2581", "<0xE2>", "<0x9C>", "<0xA8>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x89>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x80>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0xAF>", "\u2581", "<0xF0>", "<0x9F>", "<0x8E>", "<0x88>", "\u2581", "<0xF0>", "<0x9F>", "<0x99>", "<0x88>", "\u2581", "<0xF0>", "<0x9F>", "<0x99>", "<0x8C>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0x80>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x87>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x8B>", "\u2581", "\u2705", "\u2581", "<0xF0>", "<0x9F>", "<0x8E>", "<0x81>", "\u2581", "<0xF0>", "<0x9F>", "<0x8C>", "<0x9E>", "\u2581", "<0xF0>", "<0x9F>", "<0x8C>", "<0xB8>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0xB0>"],
+      ids: [1, 29871, 243, 162, 155, 133, 29871, 243, 162, 148, 144, 29871, 243, 162, 167, 166, 29871, 243, 162, 155, 144, 29871, 243, 162, 155, 176, 29871, 243, 162, 145, 140, 29871, 243, 162, 156, 146, 29871, 243, 162, 155, 141, 29871, 243, 162, 151, 168, 29871, 243, 162, 155, 132, 29871, 243, 162, 155, 136, 29871, 243, 162, 167, 154, 29871, 243, 162, 155, 137, 29871, 243, 162, 148, 146, 29871, 229, 160, 167, 30598, 29871, 243, 162, 149, 159, 29871, 243, 162, 149, 157, 29871, 243, 162, 149, 154, 29871, 243, 162, 149, 156, 29871, 243, 162, 153, 167, 29871, 243, 162, 155, 145, 29871, 243, 162, 148, 143, 29871, 243, 162, 168, 182, 29871, 243, 162, 149, 173, 29871, 229, 159, 171, 29871, 243, 162, 148, 140, 29871, 243, 162, 148, 131, 29871, 243, 162, 149, 178, 29871, 243, 162, 145, 139, 29871, 243, 162, 156, 139, 29871, 243, 162, 156, 143, 29871, 243, 162, 149, 131, 29871, 243, 162, 148, 138, 29871, 243, 162, 148, 142, 29871, 31681, 29871, 243, 162, 145, 132, 29871, 243, 162, 143, 161, 29871, 243, 162, 143, 187, 29871, 243, 162, 149, 179],
+      decoded: "<s> \ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2581", "<0xE2>", "<0x9C>", "<0xA8>", "\u2581", "<0xF0>", "<0x9F>", "<0xA4>", "<0x97>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x81>", "\ufe0f", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0xB1>", "<0xF0>", "<0x9F>", "<0x8F>", "<0xBB>", "\u2581", "<0xF0>", "<0x9F>", "<0x95>", "<0xB5>", "\u200d", "\u2642", "\ufe0f", "\u2581", "<0xF0>", "<0x9F>", "<0xA7>", "<0x99>", "<0xF0>", "<0x9F>", "<0x8F>", "<0xBB>", "\u200d", "\u2642", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0xA8>", "<0xF0>", "<0x9F>", "<0x8F>", "<0xBB>", "\u200d", "<0xF0>", "<0x9F>", "<0x8C>", "<0xBE>", "\u2581", "<0xF0>", "<0x9F>", "<0xA7>", "<0x91>", "\u200d", "<0xF0>", "<0x9F>", "<0xA4>", "<0x9D>", "\u200d", "<0xF0>", "<0x9F>", "<0xA7>", "<0x91>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0xA9>", "\u200d", "<0xE2>", "<0x9D>", "<0xA4>", "\u200d", "<0xF0>", "<0x9F>", "<0x92>", "<0x8B>", "\u200d", "<0xF0>", "<0x9F>", "<0x91>", "<0xA8>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0xA9>", "\u200d", "<0xF0>", "<0x9F>", "<0x91>", "<0xA9>", "\u200d", "<0xF0>", "<0x9F>", "<0x91>", "<0xA7>", "\u200d", "<0xF0>", "<0x9F>", "<0x91>", "<0xA6>", "\u2581", "<0xF0>", "<0x9F>", "<0xA7>", "<0x91>", "<0xF0>", "<0x9F>", "<0x8F>", "<0xBB>", "\u200d", "<0xF0>", "<0x9F>", "<0xA4>", "<0x9D>", "\u200d", "<0xF0>", "<0x9F>", "<0xA7>", "<0x91>", "<0xF0>", "<0x9F>", "<0x8F>", "<0xBB>", "\u2581", "<0xF0>", "<0x9F>", "<0x8F>", "<0xB4>", "<0xF3>", "<0xA0>", "<0x81>", "<0xA7>", "<0xF3>", "<0xA0>", "<0x81>", "<0xA2>", "<0xF3>", "<0xA0>", "<0x81>", "<0xA5>", "<0xF3>", "<0xA0>", "<0x81>", "<0xAE>", "<0xF3>", "<0xA0>", "<0x81>", "<0xA7>", "<0xF3>", "<0xA0>", "<0x81>", "<0xBF>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0xA8>", "<0xF0>", "<0x9F>", "<0x8F>", "<0xBB>", "\u200d", "<0xE2>", "<0x9D>", "<0xA4>", "\ufe0f", "\u200d", "<0xF0>", "<0x9F>", "<0x92>", "<0x8B>", "\u200d", "<0xF0>", "<0x9F>", "<0x91>", "<0xA8>", "<0xF0>", "<0x9F>", "<0x8F>", "<0xBC>"],
+      ids: [1, 29871, 229, 159, 171, 29871, 243, 162, 167, 154, 29871, 243, 162, 148, 132, 30598, 29871, 243, 162, 148, 180, 243, 162, 146, 190, 29871, 243, 162, 152, 184, 30722, 31135, 30598, 29871, 243, 162, 170, 156, 243, 162, 146, 190, 30722, 31135, 29871, 243, 162, 148, 171, 243, 162, 146, 190, 30722, 243, 162, 143, 193, 29871, 243, 162, 170, 148, 30722, 243, 162, 167, 160, 30722, 243, 162, 170, 148, 29871, 243, 162, 148, 172, 30722, 229, 160, 167, 30722, 243, 162, 149, 142, 30722, 243, 162, 148, 171, 29871, 243, 162, 148, 172, 30722, 243, 162, 148, 172, 30722, 243, 162, 148, 170, 30722, 243, 162, 148, 169, 29871, 243, 162, 170, 148, 243, 162, 146, 190, 30722, 243, 162, 167, 160, 30722, 243, 162, 170, 148, 243, 162, 146, 190, 29871, 243, 162, 146, 183, 246, 163, 132, 170, 246, 163, 132, 165, 246, 163, 132, 168, 246, 163, 132, 177, 246, 163, 132, 170, 246, 163, 132, 194, 29871, 243, 162, 148, 171, 243, 162, 146, 190, 30722, 229, 160, 167, 30598, 30722, 243, 162, 149, 142, 30722, 243, 162, 148, 171, 243, 162, 146, 191],
+      decoded: "<s> \u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75\u200d\u2642\ufe0f \ud83e\uddd9\ud83c\udffb\u200d\u2642 \ud83d\udc68\ud83c\udffb\u200d\ud83c\udf3e \ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1 \ud83d\udc69\u200d\u2764\u200d\ud83d\udc8b\u200d\ud83d\udc68 \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 \ud83e\uddd1\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc68\ud83c\udffc",
+    },
+    BPE_SCORES_PRIORITY_1: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_1,
+      tokens: ["\u2581gra", "bb", "ed"],
+      ids: [1, 2646, 1327, 287],
+      decoded: "<s> grabbed",
+    },
+    BPE_SCORES_PRIORITY_2: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_2,
+      tokens: ["\u2581", "\u2581gra", "bb", "ed"],
+      ids: [1, 29871, 2646, 1327, 287],
+      decoded: "<s>  grabbed",
+    },
+    BPE_SCORES_PRIORITY_3: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_3,
+      tokens: ["\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581\u2581", "\u2581gra", "bb", "ed"],
+      ids: [1, 9651, 2646, 1327, 287],
+      decoded: "<s>            grabbed",
+    },
+    NEWLINE: {
+      text: LLAMA_TEST_STRINGS.NEWLINE,
+      tokens: ["\u2581", "<0x0A>"],
+      ids: [1, 29871, 13],
+      decoded: "<s> \n",
+    },
+    NEWLINE_WITH_LEADING_SPACE: {
+      text: LLAMA_TEST_STRINGS.NEWLINE_WITH_LEADING_SPACE,
+      tokens: ["\u2581\u2581", "<0x0A>"],
+      ids: [1, 259, 13],
+      decoded: "<s>  \n",
+    },
+    TABS: {
+      text: LLAMA_TEST_STRINGS.TABS,
+      tokens: ["\u2581", "<0x09>", "tabs", "<0x09>", "<0x09>", "<0x09>", "<0x09>", "out", "\u2581here"],
+      ids: [1, 29871, 12, 21175, 12, 12, 12, 12, 449, 1244],
+      decoded: "<s> \ttabs\t\t\t\tout here",
+    },
+    NEWLINE_AND_TAB: {
+      text: LLAMA_TEST_STRINGS.NEWLINE_AND_TAB,
+      tokens: ["\u2581", "<0x0A>", "<0x09>", "<0x0A>"],
+      ids: [1, 29871, 13, 12, 13],
+      decoded: "<s> \n\t\n",
+    },
+    CHINESE_LETTER: {
+      text: LLAMA_TEST_STRINGS.CHINESE_LETTER,
+      tokens: ["\u2581", "\u9547"],
+      ids: [1, 29871, 30411],
+      decoded: "<s> \u9547",
+    },
+    EMOJIS_1: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_1,
+      tokens: ["\u2581", "<0xF0>", "<0x9F>", "<0xA6>", "<0x99>"],
+      ids: [1, 29871, 243, 162, 169, 156],
+      decoded: "<s> \ud83e\udd99",
+    },
+    EMOJIS_2: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_2,
+      tokens: ["\u2581", "<0xF0>", "<0x9F>", "<0xA6>", "<0x99>", "<0xEA>", "<0x99>", "<0x8A>"],
+      ids: [1, 29871, 243, 162, 169, 156, 237, 156, 141],
+      decoded: "<s> \ud83e\udd99\ua64a",
+    },
+    EMOJIS_3: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_3,
+      tokens: ["\u2581", "<0xEA>", "<0x99>", "<0x8A>", "<0xF0>", "<0x9F>", "<0xA6>", "<0x99>"],
+      ids: [1, 29871, 237, 156, 141, 243, 162, 169, 156],
+      decoded: "<s> \ua64a\ud83e\udd99",
+    },
+    PARAGRAPH: {
+      text: LLAMA_TEST_STRINGS.PARAGRAPH,
+      tokens: ["\u2581The", "\u2581ll", "ama", "\u2581(/", "\u02c8", "l", "\u0251", "\u02d0", "m", "\u0259", "/", ";", "\u2581", "<0xF0>", "<0x9F>", "<0xA6>", "<0x99>", "Span", "ish", "\u2581pron", "unci", "ation", ":", "\u2581[", "\u02c8", "\u028e", "ama", "])", "\u2581(", "L", "ama", "\u2581gl", "ama", ")", "\u2581is", "\u2581a", "\u2581domestic", "ated", "\u2581South", "\u2581American", "\u2581cam", "el", "id", ",", "\u2581widely", "\u2581used", "\u2581as", "\u2581a", "\u2581meat", "\u2581and", "\u2581pack", "\u2581animal", "\u2581by", "\u2581And", "e", "an", "\u2581cult", "ures", "\u2581since", "\u2581the", "\u2581Pre", "-", "Col", "umb", "ian", "\u2581era", ".", "\u2581L", "lam", "as", "\u2581are", "\u2581social", "\u2581animals", "\u2581and", "\u2581live", "\u2581with", "\u2581others", "\u2581as", "\u2581a", "\u2581her", "d", ".", "\u2581Their", "\u2581w", "ool", "\u2581is", "\u2581soft", "\u2581and", "\u2581contains", "\u2581only", "\u2581a", "\u2581small", "\u2581amount", "\u2581of", "\u2581lan", "olin", ".[", "2", "]", "\u2581L", "lam", "as", "\u2581can", "\u2581learn", "\u2581simple", "\u2581tasks", "\u2581after", "\u2581a", "\u2581few", "\u2581repet", "itions", ".", "\u2581When", "\u2581using", "\u2581a", "\u2581pack", ",", "\u2581they", "\u2581can", "\u2581carry", "\u2581about", "\u2581", "2", "5", "\u2581to", "\u2581", "3", "0", "%", "\u2581of", "\u2581their", "\u2581body", "\u2581weight", "\u2581for", "\u2581", "8", "\u2581to", "\u2581", "1", "3", "\u2581km", "\u2581(", "5", "\u2013", "8", "\u2581miles", ").", "[", "3", "]", "\u2581The", "\u2581name", "\u2581ll", "ama", "\u2581(", "in", "\u2581the", "\u2581past", "\u2581also", "\u2581sp", "elled", '\u2581"', "l", "ama", '"', "\u2581or", '\u2581"', "gl", "ama", '")', "\u2581was", "\u2581adopted", "\u2581by", "\u2581European", "\u2581sett", "lers", "\u2581from", "\u2581native", "\u2581Peru", "vi", "ans", ".[", "4", "]", "\u2581The", "\u2581ancest", "ors", "\u2581of", "\u2581llam", "as", "\u2581are", "\u2581thought", "\u2581to", "\u2581have", "\u2581origin", "ated", "\u2581from", "\u2581the", "\u2581Great", "\u2581Pla", "ins", "\u2581of", "\u2581North", "\u2581America", "\u2581about", "\u2581", "4", "0", "\u2581million", "\u2581years", "\u2581ago", ",", "\u2581and", "\u2581subsequently", "\u2581migr", "ated", "\u2581to", "\u2581South", "\u2581America", "\u2581about", "\u2581three", "\u2581million", "\u2581years", "\u2581ago", "\u2581during", "\u2581the", "\u2581Great", "\u2581American", "\u2581Inter", "change", ".", "\u2581By", "\u2581the", "\u2581end", "\u2581of", "\u2581the", "\u2581last", "\u2581ice", "\u2581age", "\u2581(", "1", "0", ",", "0", "0", "0", "\u2013", "1", "2", ",", "0", "0", "0", "\u2581years", "\u2581ago", "),", "\u2581cam", "el", "ids", "\u2581were", "\u2581ext", "inct", "\u2581in", "\u2581North", "\u2581America", ".[", "3", "]", "\u2581As", "\u2581of", "\u2581", "2", "0", "0", "7", ",", "\u2581there", "\u2581were", "\u2581over", "\u2581seven", "\u2581million", "\u2581llam", "as", "\u2581and", "\u2581al", "p", "ac", "as", "\u2581in", "\u2581South", "\u2581America", "\u2581and", "\u2581over", "\u2581", "1", "5", "8", ",", "0", "0", "0", "\u2581llam", "as", "\u2581and", "\u2581", "1", "0", "0", ",", "0", "0", "0", "<0xEA>", "<0x99>", "<0x8A>", "<0xF0>", "<0x9F>", "<0xA6>", "<0x99>", "\u2581al", "p", "ac", "as", ",", "\u2581desc", "ended", "\u2581from", "\u2581pro", "gen", "itors", "\u2581imported", "\u2581late", "\u2581in", "\u2581the", "\u2581", "2", "0", "th", "\u2581century", ",", "\u2581in", "\u2581the", "\u2581United", "\u2581States", "\u2581and", "\u2581Canada", ".[", "5", "]", "\u2581In", "\u2581A", "ym", "ara", "\u2581myth", "ology", ",", "\u2581llam", "as", "\u2581are", "\u2581important", "\u2581be", "ings", ".", "\u2581The", "\u2581Heaven", "ly", "\u2581L", "l", "ama", "\u2581is", "\u2581said", "\u2581to", "\u2581drink", "\u2581water", "\u2581from", "\u2581the", "\u2581ocean", "\u2581and", "\u2581ur", "in", "ates", "\u2581as", "\u2581it", "\u2581ra", "ins", ".[", "6", "]", "\u2581According", "\u2581to", "\u2581A", "ym", "ara", "\u2581es", "chat", "ology", ",", "\u2581llam", "as", "\u2581will", "\u2581return", "\u2581to", "\u2581the", "\u2581water", "\u2581spr", "ings", "\u2581and", "\u2581l", "ago", "ons", "\u2581where", "\u2581they", "\u2581come", "\u2581from", "\u2581at", "\u2581the", "\u2581end", "\u2581of", "\u2581time", ".[", "6", "]"],
+      ids: [1, 450, 11148, 3304, 20374, 30176, 29880, 30426, 30215, 29885, 30184, 29914, 29936, 29871, 243, 162, 169, 156, 15495, 728, 11504, 11173, 362, 29901, 518, 30176, 31743, 3304, 2314, 313, 29931, 3304, 3144, 3304, 29897, 338, 263, 21849, 630, 4275, 3082, 3949, 295, 333, 29892, 17644, 1304, 408, 263, 27654, 322, 4870, 13019, 491, 1126, 29872, 273, 4185, 1973, 1951, 278, 4721, 29899, 1625, 3774, 713, 3152, 29889, 365, 5288, 294, 526, 5264, 15006, 322, 5735, 411, 4045, 408, 263, 902, 29881, 29889, 11275, 281, 1507, 338, 4964, 322, 3743, 871, 263, 2319, 5253, 310, 10906, 22878, 7226, 29906, 29962, 365, 5288, 294, 508, 5110, 2560, 9595, 1156, 263, 2846, 21159, 2187, 29889, 1932, 773, 263, 4870, 29892, 896, 508, 8677, 1048, 29871, 29906, 29945, 304, 29871, 29941, 29900, 29995, 310, 1009, 3573, 7688, 363, 29871, 29947, 304, 29871, 29896, 29941, 2383, 313, 29945, 29994, 29947, 7800, 467, 29961, 29941, 29962, 450, 1024, 11148, 3304, 313, 262, 278, 4940, 884, 805, 14356, 376, 29880, 3304, 29908, 470, 376, 3820, 3304, 1159, 471, 16356, 491, 7824, 3604, 9306, 515, 7531, 25493, 1403, 550, 7226, 29946, 29962, 450, 19525, 943, 310, 11829, 294, 526, 2714, 304, 505, 3978, 630, 515, 278, 7027, 13494, 1144, 310, 4644, 6813, 1048, 29871, 29946, 29900, 7284, 2440, 8020, 29892, 322, 17602, 9725, 630, 304, 4275, 6813, 1048, 2211, 7284, 2440, 8020, 2645, 278, 7027, 3082, 4124, 3167, 29889, 2648, 278, 1095, 310, 278, 1833, 14890, 5046, 313, 29896, 29900, 29892, 29900, 29900, 29900, 29994, 29896, 29906, 29892, 29900, 29900, 29900, 2440, 8020, 511, 3949, 295, 4841, 892, 1294, 5562, 297, 4644, 6813, 7226, 29941, 29962, 1094, 310, 29871, 29906, 29900, 29900, 29955, 29892, 727, 892, 975, 9881, 7284, 11829, 294, 322, 394, 29886, 562, 294, 297, 4275, 6813, 322, 975, 29871, 29896, 29945, 29947, 29892, 29900, 29900, 29900, 11829, 294, 322, 29871, 29896, 29900, 29900, 29892, 29900, 29900, 29900, 237, 156, 141, 243, 162, 169, 156, 394, 29886, 562, 294, 29892, 5153, 2760, 515, 410, 1885, 17259, 19673, 5683, 297, 278, 29871, 29906, 29900, 386, 6462, 29892, 297, 278, 3303, 3900, 322, 7400, 7226, 29945, 29962, 512, 319, 962, 2518, 22082, 3002, 29892, 11829, 294, 526, 4100, 367, 886, 29889, 450, 22977, 368, 365, 29880, 3304, 338, 1497, 304, 13748, 4094, 515, 278, 23474, 322, 5065, 262, 1078, 408, 372, 1153, 1144, 7226, 29953, 29962, 7579, 304, 319, 962, 2518, 831, 13496, 3002, 29892, 11829, 294, 674, 736, 304, 278, 4094, 7689, 886, 322, 301, 4425, 787, 988, 896, 2041, 515, 472, 278, 1095, 310, 931, 7226, 29953, 29962],
+      decoded: '<s> The llama (/\u02c8l\u0251\u02d0m\u0259/; \ud83e\udd99Spanish pronunciation: [\u02c8\u028eama]) (Lama glama) is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the Pre-Columbian era. Llamas are social animals and live with others as a herd. Their wool is soft and contains only a small amount of lanolin.[2] Llamas can learn simple tasks after a few repetitions. When using a pack, they can carry about 25 to 30% of their body weight for 8 to 13 km (5\u20138 miles).[3] The name llama (in the past also spelled "lama" or "glama") was adopted by European settlers from native Peruvians.[4] The ancestors of llamas are thought to have originated from the Great Plains of North America about 40 million years ago, and subsequently migrated to South America about three million years ago during the Great American Interchange. By the end of the last ice age (10,000\u201312,000 years ago), camelids were extinct in North America.[3] As of 2007, there were over seven million llamas and alpacas in South America and over 158,000 llamas and 100,000\ua64a\ud83e\udd99 alpacas, descended from progenitors imported late in the 20th century, in the United States and Canada.[5] In Aymara mythology, llamas are important beings. The Heavenly Llama is said to drink water from the ocean and urinates as it rains.[6] According to Aymara eschatology, llamas will return to the water springs and lagoons where they come from at the end of time.[6]',
+    },
+  },
+  "Xenova/llama3-tokenizer": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "\u0120are", "\u0120you", "\u0120doing", "?"],
+      ids: [4438, 527, 499, 3815, 30],
+      decoded: "How are you doing?",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "\u0120should", "'ve", "\u0120done", "\u0120this"],
+      ids: [2675, 1288, 3077, 2884, 420],
+      decoded: "You should've done this",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["012", "345", "678", "9", "\u0120", "0", "\u0120", "1", "\u0120", "2", "\u0120", "3", "\u0120", "4", "\u0120", "5", "\u0120", "6", "\u0120", "7", "\u0120", "8", "\u0120", "9", "\u0120", "10", "\u0120", "100", "\u0120", "100", "0"],
+      ids: [11531, 12901, 17458, 24, 220, 15, 220, 16, 220, 17, 220, 18, 220, 19, 220, 20, 220, 21, 220, 22, 220, 23, 220, 24, 220, 605, 220, 1041, 220, 1041, 15],
+      decoded: "0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120founded", "\u0120in", "\u0120", "201", "6", "."],
+      ids: [791, 2883, 574, 18538, 304, 220, 679, 21, 13],
+      decoded: "The company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'ll", "\u0120!!", "to", "?'", "d", "''", "d", "\u0120of", ",", "\u0120can", "'t", "."],
+      ids: [32, 198, 3358, 11261, 998, 20837, 67, 4708, 67, 315, 11, 649, 956, 13],
+      decoded: "A\n'll!!to?'d''d of, can't.",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "\u0120main", "():\u010a", "\u0109pass"],
+      ids: [755, 1925, 4019, 42531],
+      decoded: "def main():\n\tpass",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120obj", ".toString", "();\u010a", "toString", "();"],
+      ids: [1169, 264, 284, 2909, 5180, 545, 6712, 2178],
+      decoded: "let a = obj.toString();\ntoString();",
+    },
+    NEWLINES: {
+      text: LLAMA_TEST_STRINGS.NEWLINES,
+      tokens: ["ax", "\u010a", "####\u010a", "boo"],
+      ids: [710, 198, 71050, 34093],
+      decoded: "ax\n####\nboo",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "want", "\u00c3\u00a9d", ",", "running"],
+      ids: [1899, 53757, 15433, 11, 28272],
+      decoded: "UNwant\u00e9d,running",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0100", "2", "\u00ef\u00bf\u00bd", "3"],
+      ids: [16, 188, 17, 5809, 18],
+      decoded: "1\u00002\ufffd3",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hello", "\u0120World"],
+      ids: [9906, 4435],
+      decoded: "Hello World",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello", "\u0120world"],
+      ids: [15339, 1917],
+      decoded: "hello world",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141\u00e6\u00b4\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e\u0141", "\u00e8\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [104654, 9554, 89151, 39013, 249, 21043],
+      decoded: "\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120\u0120", "\u0120leading", "\u0120space"],
+      ids: [256, 6522, 3634],
+      decoded: "   leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tr", "ailing", "\u0120space", "\u0120\u0120\u0120"],
+      ids: [376, 14612, 3634, 262],
+      decoded: "trailing space   ",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["Hi", "\u0120", "\u0120Hello"],
+      ids: [13347, 220, 22691],
+      decoded: "Hi  Hello",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2\u00a5", "6", "\u0120\u00e2\u0124", "\u00a3", "7", "\u0120\u00e2\u0124\u00b9", "8", "\u0120\u00e2\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [1985, 400, 16, 432, 17, 674, 18, 13281, 19, 7083, 20, 72588, 21, 113384, 96, 22, 90891, 23, 113384, 109, 24, 1296],
+      decoded: "test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120bought", "\u0120an", "\u0120apple", "\u0120for", "\u0120$", "1", ".", "00", "\u0120at", "\u0120the", "\u0120store", "."],
+      ids: [40, 11021, 459, 24149, 369, 400, 16, 13, 410, 520, 279, 3637, 13],
+      decoded: "I bought an apple for $1.00 at the store.",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u0120\u0120"],
+      ids: [9514, 1981, 256],
+      decoded: "you\u2026  ",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [9514, 1981, 9421],
+      decoded: "you\u2026\u00a0\u00a0",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142", "\u00c2\u0142", "you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [9514, 1981, 4194, 4194, 9514, 1981, 9421],
+      decoded: "you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120\u00ef\u00bd\u0140", "\u0120edge", "\u0120\u00ef\u00bd\u0140", "\u0120case"],
+      ids: [906, 2668, 111942, 6964, 111942, 1162],
+      decoded: "weird \uff5e edge \uff5e case",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u00e2\u0138", "\u0123", "This", "\u0120\u00e2\u0138", "\u0123", "is", "\u0120\u00e2\u0138", "\u0123", "a", "\u0120\u00e2\u0138", "\u0123", "test", "\u0120\u00e2\u0138", "\u0123", "."],
+      ids: [10634, 223, 2028, 14860, 223, 285, 14860, 223, 64, 14860, 223, 1985, 14860, 223, 13],
+      decoded: "\u2581This \u2581is \u2581a \u2581test \u2581.",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u00f0\u0141\u013a", "\u0124", "\u0120\u00f0\u0141\u0133", "\u012f", "\u0120\u00f0\u0141", "\u00a4", "\u00a3", "\u0120\u00f0\u0141\u013a", "\u012f", "\u0120\u00f0\u0141\u013a", "\u0143", "\u0120\u00f0\u0141", "\u0130", "\u012b", "\u0120\u00f0\u0141", "\u013b", "\u0131", "\u0120\u00f0\u0141\u013a", "\u012c", "\u0120\u00f0\u0141\u0136", "\u00a5", "\u0120\u00f0\u0141\u013a", "\u0123", "\u0120\u00f0\u0141\u013a", "\u0127", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141\u013a", "\u0128", "\u0120\u00f0\u0141\u0133", "\u0131", "\u0120\u00e2\u013f\u00a4", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141\u0134", "\u013e", "\u0120\u00f0\u0141\u0134", "\u013c", "\u0120\u00f0\u0141\u0134", "\u0139", "\u0120\u00f0\u0141\u0134", "\u013b", "\u0120\u00f0\u0141", "\u0138", "\u00a4", "\u0120\u00f0\u0141\u013a", "\u0130", "\u0120\u00f0\u0141\u0133", "\u012e", "\u0120\u00f0\u0141", "\u00a5", "\u00b3", "\u0120\u00f0\u0141\u0134", "\u00aa", "\u0120\u00e2\u013e", "\u00a8", "\u0120\u00f0\u0141\u0133", "\u012b", "\u0120\u00f0\u0141\u0133", "\u0122", "\u0120\u00f0\u0141\u0134", "\u00af", "\u0120\u00f0\u0141", "\u0130", "\u012a", "\u0120\u00f0\u0141", "\u013b", "\u012a", "\u0120\u00f0\u0141", "\u013b", "\u012e", "\u0120\u00f0\u0141\u0134", "\u0122", "\u0120\u00f0\u0141\u0133", "\u0129", "\u0120\u00f0\u0141\u0133", "\u012d", "\u0120\u00e2\u013e", "\u0127", "\u0120\u00f0\u0141", "\u0130", "\u0123", "\u0120\u00f0\u0141", "\u012e", "\u0140", "\u0120\u00f0\u0141", "\u012e", "\u00b8", "\u0120\u00f0\u0141\u0134", "\u00b0"],
+      ids: [76460, 224, 62904, 235, 11410, 97, 96, 27623, 235, 27623, 255, 11410, 236, 231, 11410, 247, 237, 27623, 232, 96169, 98, 27623, 223, 27623, 227, 11410, 97, 245, 27623, 228, 62904, 237, 71570, 31643, 64139, 250, 64139, 248, 64139, 245, 64139, 247, 11410, 244, 97, 27623, 236, 62904, 234, 11410, 98, 111, 64139, 103, 26602, 101, 62904, 231, 62904, 222, 64139, 107, 11410, 236, 230, 11410, 247, 230, 11410, 247, 234, 64139, 222, 62904, 229, 62904, 233, 26602, 227, 11410, 236, 223, 11410, 234, 252, 11410, 234, 116, 64139, 108],
+      decoded: "\ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u00e2\u013e", "\u00a8", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141\u0133", "\u0123", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141\u0133", "\u00b1", "\u00f0\u0141", "\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0137", "\u00b5", "\u00e2\u0122\u012f", "\u00e2\u013b", "\u0124", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141", "\u00a7", "\u013b", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00e2\u013b", "\u0124", "\u0120\u00f0\u0141\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u012e", "\u00be", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u0120\u00f0\u0141\u0133", "\u00a9", "\u00e2\u0122\u012f", "\u00e2\u013f\u00a4", "\u00e2\u0122\u012f", "\u00f0\u0141\u0134", "\u012d", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a8", "\u0120\u00f0\u0141\u0133", "\u00a9", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a9", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a7", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a6", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141", "\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0131", "\u00b4", "\u00f3", "\u0142\u0123", "\u00a7", "\u00f3", "\u0142\u0123", "\u00a2", "\u00f3", "\u0142\u0123", "\u00a5", "\u00f3", "\u0142\u0123", "\u00ae", "\u00f3", "\u0142\u0123", "\u00a7", "\u00f3", "\u0142\u0123", "\u00bf", "\u0120\u00f0\u0141\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00e2\u013f\u00a4", "\u00ef\u00b8\u0131", "\u00e2\u0122\u012f", "\u00f0\u0141\u0134", "\u012d", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bc"],
+      ids: [38798, 101, 11410, 97, 245, 62904, 223, 31643, 62904, 109, 9468, 237, 119, 11410, 243, 113, 102470, 17245, 224, 31643, 11410, 100, 247, 9468, 237, 119, 102470, 17245, 224, 62904, 101, 9468, 237, 119, 102470, 9468, 234, 122, 11410, 100, 239, 102470, 9468, 97, 251, 102470, 9468, 100, 239, 62904, 102, 102470, 121643, 102470, 93273, 233, 102470, 9468, 239, 101, 62904, 102, 102470, 9468, 239, 102, 102470, 9468, 239, 100, 102470, 9468, 239, 99, 11410, 100, 239, 9468, 237, 119, 102470, 9468, 97, 251, 102470, 9468, 100, 239, 9468, 237, 119, 11410, 237, 112, 175, 16050, 100, 175, 16050, 95, 175, 16050, 98, 175, 16050, 106, 175, 16050, 100, 175, 16050, 123, 62904, 101, 9468, 237, 119, 102470, 121643, 31643, 102470, 93273, 233, 102470, 9468, 239, 101, 9468, 237, 120],
+      decoded: "\u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75\u200d\u2642\ufe0f \ud83e\uddd9\ud83c\udffb\u200d\u2642 \ud83d\udc68\ud83c\udffb\u200d\ud83c\udf3e \ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1 \ud83d\udc69\u200d\u2764\u200d\ud83d\udc8b\u200d\ud83d\udc68 \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 \ud83e\uddd1\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc68\ud83c\udffc",
+    },
+    BPE_SCORES_PRIORITY_1: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_1,
+      tokens: ["grab", "bed"],
+      ids: [59312, 2788],
+      decoded: "grabbed",
+    },
+    BPE_SCORES_PRIORITY_2: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_2,
+      tokens: ["\u0120grabbed"],
+      ids: [30418],
+      decoded: " grabbed",
+    },
+    BPE_SCORES_PRIORITY_3: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_3,
+      tokens: ["\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "\u0120grabbed"],
+      ids: [1881, 30418],
+      decoded: "           grabbed",
+    },
+    NEWLINE: {
+      text: LLAMA_TEST_STRINGS.NEWLINE,
+      tokens: ["\u010a"],
+      ids: [198],
+      decoded: "\n",
+    },
+    NEWLINE_WITH_LEADING_SPACE: {
+      text: LLAMA_TEST_STRINGS.NEWLINE_WITH_LEADING_SPACE,
+      tokens: ["\u0120\u010a"],
+      ids: [720],
+      decoded: " \n",
+    },
+    TABS: {
+      text: LLAMA_TEST_STRINGS.TABS,
+      tokens: ["\u0109t", "abs", "\u0109\u0109\u0109", "\u0109out", "\u0120here"],
+      ids: [3324, 3518, 573, 14294, 1618],
+      decoded: "\ttabs\t\t\t\tout here",
+    },
+    NEWLINE_AND_TAB: {
+      text: LLAMA_TEST_STRINGS.NEWLINE_AND_TAB,
+      tokens: ["\u010a\u0109\u010a"],
+      ids: [18108],
+      decoded: "\n\t\n",
+    },
+    CHINESE_LETTER: {
+      text: LLAMA_TEST_STRINGS.CHINESE_LETTER,
+      tokens: ["\u00e9\u0137\u0129"],
+      ids: [104643],
+      decoded: "\u9547",
+    },
+    EMOJIS_1: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_1,
+      tokens: ["\u00f0\u0141", "\u00a6", "\u013b"],
+      ids: [9468, 99, 247],
+      decoded: "\ud83e\udd99",
+    },
+    EMOJIS_2: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_2,
+      tokens: ["\u00f0\u0141", "\u00a6", "\u013b", "\u00ea", "\u013b", "\u012c"],
+      ids: [9468, 99, 247, 166, 247, 232],
+      decoded: "\ud83e\udd99\ua64a",
+    },
+    EMOJIS_3: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_3,
+      tokens: ["\u00ea", "\u013b", "\u012c", "\u00f0\u0141", "\u00a6", "\u013b"],
+      ids: [166, 247, 232, 9468, 99, 247],
+      decoded: "\ua64a\ud83e\udd99",
+    },
+    PARAGRAPH: {
+      text: LLAMA_TEST_STRINGS.PARAGRAPH,
+      tokens: ["The", "\u0120llama", "\u0120(/", "\u00cb", "\u012a", "l", "\u00c9", "\u0133", "\u00cb", "\u0132", "m", "\u00c9\u013b", "/", ";", "\u0120\u00f0\u0141", "\u00a6", "\u013b", "Spanish", "\u0120pronunciation", ":", "\u0120[", "\u00cb", "\u012a", "\u00ca", "\u0130", "ama", "])", "\u0120(", "L", "ama", "\u0120gl", "ama", ")", "\u0120is", "\u0120a", "\u0120domestic", "ated", "\u0120South", "\u0120American", "\u0120camel", "id", ",", "\u0120widely", "\u0120used", "\u0120as", "\u0120a", "\u0120meat", "\u0120and", "\u0120pack", "\u0120animal", "\u0120by", "\u0120And", "ean", "\u0120cultures", "\u0120since", "\u0120the", "\u0120Pre", "-C", "olum", "bian", "\u0120era", ".", "\u0120L", "lam", "as", "\u0120are", "\u0120social", "\u0120animals", "\u0120and", "\u0120live", "\u0120with", "\u0120others", "\u0120as", "\u0120a", "\u0120herd", ".", "\u0120Their", "\u0120wool", "\u0120is", "\u0120soft", "\u0120and", "\u0120contains", "\u0120only", "\u0120a", "\u0120small", "\u0120amount", "\u0120of", "\u0120lan", "olin", ".[", "2", "]", "\u0120L", "lam", "as", "\u0120can", "\u0120learn", "\u0120simple", "\u0120tasks", "\u0120after", "\u0120a", "\u0120few", "\u0120repetitions", ".", "\u0120When", "\u0120using", "\u0120a", "\u0120pack", ",", "\u0120they", "\u0120can", "\u0120carry", "\u0120about", "\u0120", "25", "\u0120to", "\u0120", "30", "%", "\u0120of", "\u0120their", "\u0120body", "\u0120weight", "\u0120for", "\u0120", "8", "\u0120to", "\u0120", "13", "\u0120km", "\u0120(", "5", "\u00e2\u0122\u0135", "8", "\u0120miles", ").[", "3", "]", "\u0120The", "\u0120name", "\u0120llama", "\u0120(", "in", "\u0120the", "\u0120past", "\u0120also", "\u0120spelled", '\u0120"', "lama", '"', "\u0120or", '\u0120"', "gl", "ama", '")', "\u0120was", "\u0120adopted", "\u0120by", "\u0120European", "\u0120settlers", "\u0120from", "\u0120native", "\u0120Per", "uv", "ians", ".[", "4", "]", "\u0120The", "\u0120ancestors", "\u0120of", "\u0120ll", "amas", "\u0120are", "\u0120thought", "\u0120to", "\u0120have", "\u0120originated", "\u0120from", "\u0120the", "\u0120Great", "\u0120Plains", "\u0120of", "\u0120North", "\u0120America", "\u0120about", "\u0120", "40", "\u0120million", "\u0120years", "\u0120ago", ",", "\u0120and", "\u0120subsequently", "\u0120migrated", "\u0120to", "\u0120South", "\u0120America", "\u0120about", "\u0120three", "\u0120million", "\u0120years", "\u0120ago", "\u0120during", "\u0120the", "\u0120Great", "\u0120American", "\u0120Inter", "change", ".", "\u0120By", "\u0120the", "\u0120end", "\u0120of", "\u0120the", "\u0120last", "\u0120ice", "\u0120age", "\u0120(", "10", ",", "000", "\u00e2\u0122\u0135", "12", ",", "000", "\u0120years", "\u0120ago", "),", "\u0120camel", "ids", "\u0120were", "\u0120extinct", "\u0120in", "\u0120North", "\u0120America", ".[", "3", "]", "\u0120As", "\u0120of", "\u0120", "200", "7", ",", "\u0120there", "\u0120were", "\u0120over", "\u0120seven", "\u0120million", "\u0120ll", "amas", "\u0120and", "\u0120al", "pac", "as", "\u0120in", "\u0120South", "\u0120America", "\u0120and", "\u0120over", "\u0120", "158", ",", "000", "\u0120ll", "amas", "\u0120and", "\u0120", "100", ",", "000", "\u00ea", "\u013b", "\u012c", "\u00f0\u0141", "\u00a6", "\u013b", "\u0120al", "pac", "as", ",", "\u0120descended", "\u0120from", "\u0120progen", "itors", "\u0120imported", "\u0120late", "\u0120in", "\u0120the", "\u0120", "20", "th", "\u0120century", ",", "\u0120in", "\u0120the", "\u0120United", "\u0120States", "\u0120and", "\u0120Canada", ".[", "5", "]", "\u0120In", "\u0120A", "ym", "ara", "\u0120mythology", ",", "\u0120ll", "amas", "\u0120are", "\u0120important", "\u0120beings", ".", "\u0120The", "\u0120Heavenly", "\u0120L", "lama", "\u0120is", "\u0120said", "\u0120to", "\u0120drink", "\u0120water", "\u0120from", "\u0120the", "\u0120ocean", "\u0120and", "\u0120ur", "in", "ates", "\u0120as", "\u0120it", "\u0120rains", ".[", "6", "]", "\u0120According", "\u0120to", "\u0120A", "ym", "ara", "\u0120es", "chat", "ology", ",", "\u0120ll", "amas", "\u0120will", "\u0120return", "\u0120to", "\u0120the", "\u0120water", "\u0120springs", "\u0120and", "\u0120l", "ago", "ons", "\u0120where", "\u0120they", "\u0120come", "\u0120from", "\u0120at", "\u0120the", "\u0120end", "\u0120of", "\u0120time", ".[", "6", "]"],
+      ids: [791, 94776, 47325, 135, 230, 75, 133, 239, 135, 238, 76, 99638, 14, 26, 11410, 99, 247, 62897, 71722, 25, 510, 135, 230, 134, 236, 3105, 2526, 320, 43, 3105, 2840, 3105, 8, 374, 264, 13018, 660, 4987, 3778, 50252, 307, 11, 13882, 1511, 439, 264, 13339, 323, 3854, 10065, 555, 1628, 5420, 27833, 2533, 279, 5075, 7813, 1152, 13464, 11639, 13, 445, 24705, 300, 527, 3674, 10099, 323, 3974, 449, 3885, 439, 264, 59213, 13, 11205, 39640, 374, 8579, 323, 5727, 1193, 264, 2678, 3392, 315, 31791, 37737, 8032, 17, 60, 445, 24705, 300, 649, 4048, 4382, 9256, 1306, 264, 2478, 86066, 13, 3277, 1701, 264, 3854, 11, 814, 649, 6920, 922, 220, 914, 311, 220, 966, 4, 315, 872, 2547, 4785, 369, 220, 23, 311, 220, 1032, 13437, 320, 20, 4235, 23, 8931, 94638, 18, 60, 578, 836, 94776, 320, 258, 279, 3347, 1101, 68918, 330, 81101, 1, 477, 330, 6200, 3105, 909, 574, 18306, 555, 7665, 61107, 505, 10068, 3700, 12328, 5493, 8032, 19, 60, 578, 38618, 315, 9507, 29189, 527, 3463, 311, 617, 44853, 505, 279, 8681, 63911, 315, 4892, 5270, 922, 220, 1272, 3610, 1667, 4227, 11, 323, 28520, 73691, 311, 4987, 5270, 922, 2380, 3610, 1667, 4227, 2391, 279, 8681, 3778, 5783, 3455, 13, 3296, 279, 842, 315, 279, 1566, 10054, 4325, 320, 605, 11, 931, 4235, 717, 11, 931, 1667, 4227, 705, 50252, 3447, 1051, 69918, 304, 4892, 5270, 8032, 18, 60, 1666, 315, 220, 1049, 22, 11, 1070, 1051, 927, 8254, 3610, 9507, 29189, 323, 453, 46051, 300, 304, 4987, 5270, 323, 927, 220, 11286, 11, 931, 9507, 29189, 323, 220, 1041, 11, 931, 166, 247, 232, 9468, 99, 247, 453, 46051, 300, 11, 58842, 505, 84360, 12170, 25973, 3389, 304, 279, 220, 508, 339, 9478, 11, 304, 279, 3723, 4273, 323, 7008, 8032, 20, 60, 763, 362, 1631, 5169, 59492, 11, 9507, 29189, 527, 3062, 23837, 13, 578, 88150, 445, 81101, 374, 1071, 311, 7172, 3090, 505, 279, 18435, 323, 4433, 258, 988, 439, 433, 62555, 8032, 21, 60, 10771, 311, 362, 1631, 5169, 1560, 9884, 2508, 11, 9507, 29189, 690, 471, 311, 279, 3090, 42242, 323, 326, 6438, 2439, 1405, 814, 2586, 505, 520, 279, 842, 315, 892, 8032, 21, 60],
+      decoded: 'The llama (/\u02c8l\u0251\u02d0m\u0259/; \ud83e\udd99Spanish pronunciation: [\u02c8\u028eama]) (Lama glama) is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the Pre-Columbian era. Llamas are social animals and live with others as a herd. Their wool is soft and contains only a small amount of lanolin.[2] Llamas can learn simple tasks after a few repetitions. When using a pack, they can carry about 25 to 30% of their body weight for 8 to 13 km (5\u20138 miles).[3] The name llama (in the past also spelled "lama" or "glama") was adopted by European settlers from native Peruvians.[4] The ancestors of llamas are thought to have originated from the Great Plains of North America about 40 million years ago, and subsequently migrated to South America about three million years ago during the Great American Interchange. By the end of the last ice age (10,000\u201312,000 years ago), camelids were extinct in North America.[3] As of 2007, there were over seven million llamas and alpacas in South America and over 158,000 llamas and 100,000\ua64a\ud83e\udd99 alpacas, descended from progenitors imported late in the 20th century, in the United States and Canada.[5] In Aymara mythology, llamas are important beings. The Heavenly Llama is said to drink water from the ocean and urinates as it rains.[6] According to Aymara eschatology, llamas will return to the water springs and lagoons where they come from at the end of time.[6]',
+    },
+  },
+
+  // - Sequence PostProcessor
+  // - "ignore_merges": true
+  "Xenova/llama3-tokenizer-new": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "\u0120are", "\u0120you", "\u0120doing", "?"],
+      ids: [128000, 4438, 527, 499, 3815, 30],
+      decoded: "<|begin_of_text|>How are you doing?",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "\u0120should", "'ve", "\u0120done", "\u0120this"],
+      ids: [128000, 2675, 1288, 3077, 2884, 420],
+      decoded: "<|begin_of_text|>You should've done this",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["012", "345", "678", "9", "\u0120", "0", "\u0120", "1", "\u0120", "2", "\u0120", "3", "\u0120", "4", "\u0120", "5", "\u0120", "6", "\u0120", "7", "\u0120", "8", "\u0120", "9", "\u0120", "10", "\u0120", "100", "\u0120", "100", "0"],
+      ids: [128000, 11531, 12901, 17458, 24, 220, 15, 220, 16, 220, 17, 220, 18, 220, 19, 220, 20, 220, 21, 220, 22, 220, 23, 220, 24, 220, 605, 220, 1041, 220, 1041, 15],
+      decoded: "<|begin_of_text|>0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120founded", "\u0120in", "\u0120", "201", "6", "."],
+      ids: [128000, 791, 2883, 574, 18538, 304, 220, 679, 21, 13],
+      decoded: "<|begin_of_text|>The company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'ll", "\u0120!!", "to", "?'", "d", "''", "d", "\u0120of", ",", "\u0120can", "'t", "."],
+      ids: [128000, 32, 198, 3358, 11261, 998, 20837, 67, 4708, 67, 315, 11, 649, 956, 13],
+      decoded: "<|begin_of_text|>A\n'll!!to?'d''d of, can't.",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "\u0120main", "():\u010a", "\u0109pass"],
+      ids: [128000, 755, 1925, 4019, 42531],
+      decoded: "<|begin_of_text|>def main():\n\tpass",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120obj", ".toString", "();\u010a", "toString", "();"],
+      ids: [128000, 1169, 264, 284, 2909, 5180, 545, 6712, 2178],
+      decoded: "<|begin_of_text|>let a = obj.toString();\ntoString();",
+    },
+    NEWLINES: {
+      text: LLAMA_TEST_STRINGS.NEWLINES,
+      tokens: ["ax", "\u010a", "####\u010a", "boo"],
+      ids: [128000, 710, 198, 71050, 34093],
+      decoded: "<|begin_of_text|>ax\n####\nboo",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "want", "\u00c3\u00a9d", ",", "running"],
+      ids: [128000, 1899, 53757, 15433, 11, 28272],
+      decoded: "<|begin_of_text|>UNwant\u00e9d,running",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0100", "2", "\u00ef\u00bf\u00bd", "3"],
+      ids: [128000, 16, 188, 17, 5809, 18],
+      decoded: "<|begin_of_text|>1\u00002\ufffd3",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hello", "\u0120World"],
+      ids: [128000, 9906, 4435],
+      decoded: "<|begin_of_text|>Hello World",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello", "\u0120world"],
+      ids: [128000, 15339, 1917],
+      decoded: "<|begin_of_text|>hello world",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141\u00e6\u00b4\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e\u0141", "\u00e8\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [128000, 104654, 9554, 89151, 39013, 249, 21043],
+      decoded: "<|begin_of_text|>\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120\u0120", "\u0120leading", "\u0120space"],
+      ids: [128000, 256, 6522, 3634],
+      decoded: "<|begin_of_text|>   leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tr", "ailing", "\u0120space", "\u0120\u0120\u0120"],
+      ids: [128000, 376, 14612, 3634, 262],
+      decoded: "<|begin_of_text|>trailing space   ",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["Hi", "\u0120", "\u0120Hello"],
+      ids: [128000, 13347, 220, 22691],
+      decoded: "<|begin_of_text|>Hi  Hello",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2\u00a5", "6", "\u0120\u00e2\u0124", "\u00a3", "7", "\u0120\u00e2\u0124\u00b9", "8", "\u0120\u00e2\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [128000, 1985, 400, 16, 432, 17, 674, 18, 13281, 19, 7083, 20, 72588, 21, 113384, 96, 22, 90891, 23, 113384, 109, 24, 1296],
+      decoded: "<|begin_of_text|>test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120bought", "\u0120an", "\u0120apple", "\u0120for", "\u0120$", "1", ".", "00", "\u0120at", "\u0120the", "\u0120store", "."],
+      ids: [128000, 40, 11021, 459, 24149, 369, 400, 16, 13, 410, 520, 279, 3637, 13],
+      decoded: "<|begin_of_text|>I bought an apple for $1.00 at the store.",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u0120\u0120"],
+      ids: [128000, 9514, 1981, 256],
+      decoded: "<|begin_of_text|>you\u2026  ",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [128000, 9514, 1981, 9421],
+      decoded: "<|begin_of_text|>you\u2026\u00a0\u00a0",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142", "\u00c2\u0142", "you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [128000, 9514, 1981, 4194, 4194, 9514, 1981, 9421],
+      decoded: "<|begin_of_text|>you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120\u00ef\u00bd\u0140", "\u0120edge", "\u0120\u00ef\u00bd\u0140", "\u0120case"],
+      ids: [128000, 906, 2668, 111942, 6964, 111942, 1162],
+      decoded: "<|begin_of_text|>weird \uff5e edge \uff5e case",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u00e2\u0138", "\u0123", "This", "\u0120\u00e2\u0138", "\u0123", "is", "\u0120\u00e2\u0138", "\u0123", "a", "\u0120\u00e2\u0138", "\u0123", "test", "\u0120\u00e2\u0138", "\u0123", "."],
+      ids: [128000, 10634, 223, 2028, 14860, 223, 285, 14860, 223, 64, 14860, 223, 1985, 14860, 223, 13],
+      decoded: "<|begin_of_text|>\u2581This \u2581is \u2581a \u2581test \u2581.",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u00f0\u0141\u013a", "\u0124", "\u0120\u00f0\u0141\u0133", "\u012f", "\u0120\u00f0\u0141", "\u00a4", "\u00a3", "\u0120\u00f0\u0141\u013a", "\u012f", "\u0120\u00f0\u0141\u013a", "\u0143", "\u0120\u00f0\u0141", "\u0130", "\u012b", "\u0120\u00f0\u0141", "\u013b", "\u0131", "\u0120\u00f0\u0141\u013a", "\u012c", "\u0120\u00f0\u0141\u0136", "\u00a5", "\u0120\u00f0\u0141\u013a", "\u0123", "\u0120\u00f0\u0141\u013a", "\u0127", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141\u013a", "\u0128", "\u0120\u00f0\u0141\u0133", "\u0131", "\u0120\u00e2\u013f\u00a4", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141\u0134", "\u013e", "\u0120\u00f0\u0141\u0134", "\u013c", "\u0120\u00f0\u0141\u0134", "\u0139", "\u0120\u00f0\u0141\u0134", "\u013b", "\u0120\u00f0\u0141", "\u0138", "\u00a4", "\u0120\u00f0\u0141\u013a", "\u0130", "\u0120\u00f0\u0141\u0133", "\u012e", "\u0120\u00f0\u0141", "\u00a5", "\u00b3", "\u0120\u00f0\u0141\u0134", "\u00aa", "\u0120\u00e2\u013e", "\u00a8", "\u0120\u00f0\u0141\u0133", "\u012b", "\u0120\u00f0\u0141\u0133", "\u0122", "\u0120\u00f0\u0141\u0134", "\u00af", "\u0120\u00f0\u0141", "\u0130", "\u012a", "\u0120\u00f0\u0141", "\u013b", "\u012a", "\u0120\u00f0\u0141", "\u013b", "\u012e", "\u0120\u00f0\u0141\u0134", "\u0122", "\u0120\u00f0\u0141\u0133", "\u0129", "\u0120\u00f0\u0141\u0133", "\u012d", "\u0120\u00e2\u013e", "\u0127", "\u0120\u00f0\u0141", "\u0130", "\u0123", "\u0120\u00f0\u0141", "\u012e", "\u0140", "\u0120\u00f0\u0141", "\u012e", "\u00b8", "\u0120\u00f0\u0141\u0134", "\u00b0"],
+      ids: [128000, 76460, 224, 62904, 235, 11410, 97, 96, 27623, 235, 27623, 255, 11410, 236, 231, 11410, 247, 237, 27623, 232, 96169, 98, 27623, 223, 27623, 227, 11410, 97, 245, 27623, 228, 62904, 237, 71570, 31643, 64139, 250, 64139, 248, 64139, 245, 64139, 247, 11410, 244, 97, 27623, 236, 62904, 234, 11410, 98, 111, 64139, 103, 26602, 101, 62904, 231, 62904, 222, 64139, 107, 11410, 236, 230, 11410, 247, 230, 11410, 247, 234, 64139, 222, 62904, 229, 62904, 233, 26602, 227, 11410, 236, 223, 11410, 234, 252, 11410, 234, 116, 64139, 108],
+      decoded: "<|begin_of_text|>\ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u00e2\u013e", "\u00a8", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141\u0133", "\u0123", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141\u0133", "\u00b1", "\u00f0\u0141", "\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0137", "\u00b5", "\u00e2\u0122\u012f", "\u00e2\u013b", "\u0124", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141", "\u00a7", "\u013b", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00e2\u013b", "\u0124", "\u0120\u00f0\u0141\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u012e", "\u00be", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u0120\u00f0\u0141\u0133", "\u00a9", "\u00e2\u0122\u012f", "\u00e2\u013f\u00a4", "\u00e2\u0122\u012f", "\u00f0\u0141\u0134", "\u012d", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a8", "\u0120\u00f0\u0141\u0133", "\u00a9", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a9", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a7", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a6", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141", "\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0131", "\u00b4", "\u00f3", "\u0142\u0123", "\u00a7", "\u00f3", "\u0142\u0123", "\u00a2", "\u00f3", "\u0142\u0123", "\u00a5", "\u00f3", "\u0142\u0123", "\u00ae", "\u00f3", "\u0142\u0123", "\u00a7", "\u00f3", "\u0142\u0123", "\u00bf", "\u0120\u00f0\u0141\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00e2\u013f\u00a4", "\u00ef\u00b8\u0131", "\u00e2\u0122\u012f", "\u00f0\u0141\u0134", "\u012d", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bc"],
+      ids: [128000, 38798, 101, 11410, 97, 245, 62904, 223, 31643, 62904, 109, 9468, 237, 119, 11410, 243, 113, 102470, 17245, 224, 31643, 11410, 100, 247, 9468, 237, 119, 102470, 17245, 224, 62904, 101, 9468, 237, 119, 102470, 9468, 234, 122, 11410, 100, 239, 102470, 9468, 97, 251, 102470, 9468, 100, 239, 62904, 102, 102470, 121643, 102470, 93273, 233, 102470, 9468, 239, 101, 62904, 102, 102470, 9468, 239, 102, 102470, 9468, 239, 100, 102470, 9468, 239, 99, 11410, 100, 239, 9468, 237, 119, 102470, 9468, 97, 251, 102470, 9468, 100, 239, 9468, 237, 119, 11410, 237, 112, 175, 16050, 100, 175, 16050, 95, 175, 16050, 98, 175, 16050, 106, 175, 16050, 100, 175, 16050, 123, 62904, 101, 9468, 237, 119, 102470, 121643, 31643, 102470, 93273, 233, 102470, 9468, 239, 101, 9468, 237, 120],
+      decoded: "<|begin_of_text|>\u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75\u200d\u2642\ufe0f \ud83e\uddd9\ud83c\udffb\u200d\u2642 \ud83d\udc68\ud83c\udffb\u200d\ud83c\udf3e \ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1 \ud83d\udc69\u200d\u2764\u200d\ud83d\udc8b\u200d\ud83d\udc68 \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 \ud83e\uddd1\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc68\ud83c\udffc",
+    },
+    BPE_SCORES_PRIORITY_1: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_1,
+      tokens: ["grab", "bed"],
+      ids: [128000, 59312, 2788],
+      decoded: "<|begin_of_text|>grabbed",
+    },
+    BPE_SCORES_PRIORITY_2: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_2,
+      tokens: ["\u0120grabbed"],
+      ids: [128000, 30418],
+      decoded: "<|begin_of_text|> grabbed",
+    },
+    BPE_SCORES_PRIORITY_3: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_3,
+      tokens: ["\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "\u0120grabbed"],
+      ids: [128000, 1881, 30418],
+      decoded: "<|begin_of_text|>           grabbed",
+    },
+    NEWLINE: {
+      text: LLAMA_TEST_STRINGS.NEWLINE,
+      tokens: ["\u010a"],
+      ids: [128000, 198],
+      decoded: "<|begin_of_text|>\n",
+    },
+    NEWLINE_WITH_LEADING_SPACE: {
+      text: LLAMA_TEST_STRINGS.NEWLINE_WITH_LEADING_SPACE,
+      tokens: ["\u0120\u010a"],
+      ids: [128000, 720],
+      decoded: "<|begin_of_text|> \n",
+    },
+    TABS: {
+      text: LLAMA_TEST_STRINGS.TABS,
+      tokens: ["\u0109t", "abs", "\u0109\u0109\u0109", "\u0109out", "\u0120here"],
+      ids: [128000, 3324, 3518, 573, 14294, 1618],
+      decoded: "<|begin_of_text|>\ttabs\t\t\t\tout here",
+    },
+    NEWLINE_AND_TAB: {
+      text: LLAMA_TEST_STRINGS.NEWLINE_AND_TAB,
+      tokens: ["\u010a\u0109\u010a"],
+      ids: [128000, 18108],
+      decoded: "<|begin_of_text|>\n\t\n",
+    },
+    CHINESE_LETTER: {
+      text: LLAMA_TEST_STRINGS.CHINESE_LETTER,
+      tokens: ["\u00e9\u0137\u0129"],
+      ids: [128000, 104643],
+      decoded: "<|begin_of_text|>\u9547",
+    },
+    EMOJIS_1: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_1,
+      tokens: ["\u00f0\u0141", "\u00a6", "\u013b"],
+      ids: [128000, 9468, 99, 247],
+      decoded: "<|begin_of_text|>\ud83e\udd99",
+    },
+    EMOJIS_2: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_2,
+      tokens: ["\u00f0\u0141", "\u00a6", "\u013b", "\u00ea", "\u013b", "\u012c"],
+      ids: [128000, 9468, 99, 247, 166, 247, 232],
+      decoded: "<|begin_of_text|>\ud83e\udd99\ua64a",
+    },
+    EMOJIS_3: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_3,
+      tokens: ["\u00ea", "\u013b", "\u012c", "\u00f0\u0141", "\u00a6", "\u013b"],
+      ids: [128000, 166, 247, 232, 9468, 99, 247],
+      decoded: "<|begin_of_text|>\ua64a\ud83e\udd99",
+    },
+    PARAGRAPH: {
+      text: LLAMA_TEST_STRINGS.PARAGRAPH,
+      tokens: ["The", "\u0120llama", "\u0120(/", "\u00cb", "\u012a", "l", "\u00c9", "\u0133", "\u00cb", "\u0132", "m", "\u00c9\u013b", "/", ";", "\u0120\u00f0\u0141", "\u00a6", "\u013b", "Spanish", "\u0120pronunciation", ":", "\u0120[", "\u00cb", "\u012a", "\u00ca", "\u0130", "ama", "])", "\u0120(", "L", "ama", "\u0120gl", "ama", ")", "\u0120is", "\u0120a", "\u0120domestic", "ated", "\u0120South", "\u0120American", "\u0120camel", "id", ",", "\u0120widely", "\u0120used", "\u0120as", "\u0120a", "\u0120meat", "\u0120and", "\u0120pack", "\u0120animal", "\u0120by", "\u0120And", "ean", "\u0120cultures", "\u0120since", "\u0120the", "\u0120Pre", "-C", "olum", "bian", "\u0120era", ".", "\u0120L", "lam", "as", "\u0120are", "\u0120social", "\u0120animals", "\u0120and", "\u0120live", "\u0120with", "\u0120others", "\u0120as", "\u0120a", "\u0120herd", ".", "\u0120Their", "\u0120wool", "\u0120is", "\u0120soft", "\u0120and", "\u0120contains", "\u0120only", "\u0120a", "\u0120small", "\u0120amount", "\u0120of", "\u0120lan", "olin", ".[", "2", "]", "\u0120L", "lam", "as", "\u0120can", "\u0120learn", "\u0120simple", "\u0120tasks", "\u0120after", "\u0120a", "\u0120few", "\u0120repetitions", ".", "\u0120When", "\u0120using", "\u0120a", "\u0120pack", ",", "\u0120they", "\u0120can", "\u0120carry", "\u0120about", "\u0120", "25", "\u0120to", "\u0120", "30", "%", "\u0120of", "\u0120their", "\u0120body", "\u0120weight", "\u0120for", "\u0120", "8", "\u0120to", "\u0120", "13", "\u0120km", "\u0120(", "5", "\u00e2\u0122\u0135", "8", "\u0120miles", ").[", "3", "]", "\u0120The", "\u0120name", "\u0120llama", "\u0120(", "in", "\u0120the", "\u0120past", "\u0120also", "\u0120spelled", '\u0120"', "lama", '"', "\u0120or", '\u0120"', "gl", "ama", '")', "\u0120was", "\u0120adopted", "\u0120by", "\u0120European", "\u0120settlers", "\u0120from", "\u0120native", "\u0120Per", "uv", "ians", ".[", "4", "]", "\u0120The", "\u0120ancestors", "\u0120of", "\u0120ll", "amas", "\u0120are", "\u0120thought", "\u0120to", "\u0120have", "\u0120originated", "\u0120from", "\u0120the", "\u0120Great", "\u0120Plains", "\u0120of", "\u0120North", "\u0120America", "\u0120about", "\u0120", "40", "\u0120million", "\u0120years", "\u0120ago", ",", "\u0120and", "\u0120subsequently", "\u0120migrated", "\u0120to", "\u0120South", "\u0120America", "\u0120about", "\u0120three", "\u0120million", "\u0120years", "\u0120ago", "\u0120during", "\u0120the", "\u0120Great", "\u0120American", "\u0120Inter", "change", ".", "\u0120By", "\u0120the", "\u0120end", "\u0120of", "\u0120the", "\u0120last", "\u0120ice", "\u0120age", "\u0120(", "10", ",", "000", "\u00e2\u0122\u0135", "12", ",", "000", "\u0120years", "\u0120ago", "),", "\u0120camel", "ids", "\u0120were", "\u0120extinct", "\u0120in", "\u0120North", "\u0120America", ".[", "3", "]", "\u0120As", "\u0120of", "\u0120", "200", "7", ",", "\u0120there", "\u0120were", "\u0120over", "\u0120seven", "\u0120million", "\u0120ll", "amas", "\u0120and", "\u0120al", "pac", "as", "\u0120in", "\u0120South", "\u0120America", "\u0120and", "\u0120over", "\u0120", "158", ",", "000", "\u0120ll", "amas", "\u0120and", "\u0120", "100", ",", "000", "\u00ea", "\u013b", "\u012c", "\u00f0\u0141", "\u00a6", "\u013b", "\u0120al", "pac", "as", ",", "\u0120descended", "\u0120from", "\u0120progen", "itors", "\u0120imported", "\u0120late", "\u0120in", "\u0120the", "\u0120", "20", "th", "\u0120century", ",", "\u0120in", "\u0120the", "\u0120United", "\u0120States", "\u0120and", "\u0120Canada", ".[", "5", "]", "\u0120In", "\u0120A", "ym", "ara", "\u0120mythology", ",", "\u0120ll", "amas", "\u0120are", "\u0120important", "\u0120beings", ".", "\u0120The", "\u0120Heavenly", "\u0120L", "lama", "\u0120is", "\u0120said", "\u0120to", "\u0120drink", "\u0120water", "\u0120from", "\u0120the", "\u0120ocean", "\u0120and", "\u0120ur", "in", "ates", "\u0120as", "\u0120it", "\u0120rains", ".[", "6", "]", "\u0120According", "\u0120to", "\u0120A", "ym", "ara", "\u0120es", "chat", "ology", ",", "\u0120ll", "amas", "\u0120will", "\u0120return", "\u0120to", "\u0120the", "\u0120water", "\u0120springs", "\u0120and", "\u0120l", "ago", "ons", "\u0120where", "\u0120they", "\u0120come", "\u0120from", "\u0120at", "\u0120the", "\u0120end", "\u0120of", "\u0120time", ".[", "6", "]"],
+      ids: [128000, 791, 94776, 47325, 135, 230, 75, 133, 239, 135, 238, 76, 99638, 14, 26, 11410, 99, 247, 62897, 71722, 25, 510, 135, 230, 134, 236, 3105, 2526, 320, 43, 3105, 2840, 3105, 8, 374, 264, 13018, 660, 4987, 3778, 50252, 307, 11, 13882, 1511, 439, 264, 13339, 323, 3854, 10065, 555, 1628, 5420, 27833, 2533, 279, 5075, 7813, 1152, 13464, 11639, 13, 445, 24705, 300, 527, 3674, 10099, 323, 3974, 449, 3885, 439, 264, 59213, 13, 11205, 39640, 374, 8579, 323, 5727, 1193, 264, 2678, 3392, 315, 31791, 37737, 8032, 17, 60, 445, 24705, 300, 649, 4048, 4382, 9256, 1306, 264, 2478, 86066, 13, 3277, 1701, 264, 3854, 11, 814, 649, 6920, 922, 220, 914, 311, 220, 966, 4, 315, 872, 2547, 4785, 369, 220, 23, 311, 220, 1032, 13437, 320, 20, 4235, 23, 8931, 94638, 18, 60, 578, 836, 94776, 320, 258, 279, 3347, 1101, 68918, 330, 81101, 1, 477, 330, 6200, 3105, 909, 574, 18306, 555, 7665, 61107, 505, 10068, 3700, 12328, 5493, 8032, 19, 60, 578, 38618, 315, 9507, 29189, 527, 3463, 311, 617, 44853, 505, 279, 8681, 63911, 315, 4892, 5270, 922, 220, 1272, 3610, 1667, 4227, 11, 323, 28520, 73691, 311, 4987, 5270, 922, 2380, 3610, 1667, 4227, 2391, 279, 8681, 3778, 5783, 3455, 13, 3296, 279, 842, 315, 279, 1566, 10054, 4325, 320, 605, 11, 931, 4235, 717, 11, 931, 1667, 4227, 705, 50252, 3447, 1051, 69918, 304, 4892, 5270, 8032, 18, 60, 1666, 315, 220, 1049, 22, 11, 1070, 1051, 927, 8254, 3610, 9507, 29189, 323, 453, 46051, 300, 304, 4987, 5270, 323, 927, 220, 11286, 11, 931, 9507, 29189, 323, 220, 1041, 11, 931, 166, 247, 232, 9468, 99, 247, 453, 46051, 300, 11, 58842, 505, 84360, 12170, 25973, 3389, 304, 279, 220, 508, 339, 9478, 11, 304, 279, 3723, 4273, 323, 7008, 8032, 20, 60, 763, 362, 1631, 5169, 59492, 11, 9507, 29189, 527, 3062, 23837, 13, 578, 88150, 445, 81101, 374, 1071, 311, 7172, 3090, 505, 279, 18435, 323, 4433, 258, 988, 439, 433, 62555, 8032, 21, 60, 10771, 311, 362, 1631, 5169, 1560, 9884, 2508, 11, 9507, 29189, 690, 471, 311, 279, 3090, 42242, 323, 326, 6438, 2439, 1405, 814, 2586, 505, 520, 279, 842, 315, 892, 8032, 21, 60],
+      decoded: '<|begin_of_text|>The llama (/\u02c8l\u0251\u02d0m\u0259/; \ud83e\udd99Spanish pronunciation: [\u02c8\u028eama]) (Lama glama) is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the Pre-Columbian era. Llamas are social animals and live with others as a herd. Their wool is soft and contains only a small amount of lanolin.[2] Llamas can learn simple tasks after a few repetitions. When using a pack, they can carry about 25 to 30% of their body weight for 8 to 13 km (5\u20138 miles).[3] The name llama (in the past also spelled "lama" or "glama") was adopted by European settlers from native Peruvians.[4] The ancestors of llamas are thought to have originated from the Great Plains of North America about 40 million years ago, and subsequently migrated to South America about three million years ago during the Great American Interchange. By the end of the last ice age (10,000\u201312,000 years ago), camelids were extinct in North America.[3] As of 2007, there were over seven million llamas and alpacas in South America and over 158,000 llamas and 100,000\ua64a\ud83e\udd99 alpacas, descended from progenitors imported late in the 20th century, in the United States and Canada.[5] In Aymara mythology, llamas are important beings. The Heavenly Llama is said to drink water from the ocean and urinates as it rains.[6] According to Aymara eschatology, llamas will return to the water springs and lagoons where they come from at the end of time.[6]',
+    },
+  },
+  "Xenova/TinyLLama-v0": {
+    NEWLINES: {
+      text: LLAMA_TEST_STRINGS.NEWLINES,
+      tokens: ["\u2581ax", "<0x0A>", "####", "<0x0A>", "b", "oo"],
+      ids: [1, 9013, 13, 20411, 13, 31842, 2742],
+      decoded: "<s> ax\n####\nboo",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u2581", "<0xE7>", "<0x94>", "<0x9F>", "<0xE6>", "<0xB4>", "<0xBB>", "<0xE7>", "<0x9A>", "<0x84>", "<0xE7>", "<0x9C>", "<0x9F>", "<0xE8>", "<0xB0>", "<0x9B>", "<0xE6>", "<0x98>", "<0xAF>"],
+      ids: [1, 31822, 234, 151, 162, 233, 183, 190, 234, 157, 135, 234, 159, 162, 235, 179, 158, 233, 155, 178],
+      decoded: "<s> \u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581trailing", "\u2581space", "\u2581", "\u2581", "\u2581"],
+      ids: [1, 30174, 2138, 31822, 31822, 31822],
+      decoded: "<s> trailing space   ",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["\u2581test", "\u2581$", "1", "\u2581R", "2", "\u2581#", "3", "\u2581\u20ac", "4", "\u2581\u00a3", "5", "\u2581", "<0xC2>", "<0xA5>", "6", "\u2581", "<0xE2>", "<0x82>", "<0xA3>", "7", "\u2581", "<0xE2>", "<0x82>", "<0xB9>", "8", "\u2581", "<0xE2>", "<0x82>", "<0xB1>", "9", "\u2581test"],
+      ids: [1, 1397, 569, 31853, 360, 31855, 1257, 31878, 9390, 31882, 3922, 31880, 31822, 197, 168, 31887, 31822, 229, 133, 166, 31888, 31822, 229, 133, 188, 31886, 31822, 229, 133, 180, 31877, 1397],
+      decoded: "<s> test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u2581you", "\u2026", "\u2581", "\u2581"],
+      ids: [1, 365, 31925, 31822, 31822],
+      decoded: "<s> you\u2026  ",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u2581you", "\u2026", "\u00a0", "\u00a0"],
+      ids: [1, 365, 31925, 31963, 31963],
+      decoded: "<s> you\u2026\u00a0\u00a0",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u2581you", "\u2026", "\u00a0", "\u00a0", "you", "\u2026", "\u00a0", "\u00a0"],
+      ids: [1, 365, 31925, 31963, 31963, 7936, 31925, 31963, 31963],
+      decoded: "<s> you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["\u2581weird", "\u2581", "<0xEF>", "<0xBD>", "<0x9E>", "\u2581edge", "\u2581", "<0xEF>", "<0xBD>", "<0x9E>", "\u2581case"],
+      ids: [1, 9907, 31822, 242, 192, 161, 5991, 31822, 242, 192, 161, 1372],
+      decoded: "<s> weird \uff5e edge \uff5e case",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0x82>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x8D>", "\u2581", "<0xF0>", "<0x9F>", "<0xA4>", "<0xA3>", "\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0x8D>", "\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0xAD>", "\u2581", "<0xF0>", "<0x9F>", "<0x8E>", "<0x89>", "\u2581", "<0xF0>", "<0x9F>", "<0x99>", "<0x8F>", "\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0x8A>", "\u2581", "<0xF0>", "<0x9F>", "<0x94>", "<0xA5>", "\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0x81>", "\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0x85>", "\u2581", "<0xF0>", "<0x9F>", "<0xA4>", "<0x97>", "\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0x86>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x8F>", "\u2581", "<0xE2>", "<0x9D>", "<0xA4>", "<0xEF>", "<0xB8>", "<0x8F>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0x9C>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0x9A>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0x97>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0x99>", "\u2581", "<0xF0>", "<0x9F>", "<0x96>", "<0xA4>", "\u2581", "<0xF0>", "<0x9F>", "<0x98>", "<0x8E>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x8C>", "\u2581", "<0xF0>", "<0x9F>", "<0xA5>", "<0xB3>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0xAA>", "\u2581", "<0xE2>", "<0x9C>", "<0xA8>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x89>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x80>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0xAF>", "\u2581", "<0xF0>", "<0x9F>", "<0x8E>", "<0x88>", "\u2581", "<0xF0>", "<0x9F>", "<0x99>", "<0x88>", "\u2581", "<0xF0>", "<0x9F>", "<0x99>", "<0x8C>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0x80>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x87>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x8B>", "\u2581", "<0xE2>", "<0x9C>", "<0x85>", "\u2581", "<0xF0>", "<0x9F>", "<0x8E>", "<0x81>", "\u2581", "<0xF0>", "<0x9F>", "<0x8C>", "<0x9E>", "\u2581", "<0xF0>", "<0x9F>", "<0x8C>", "<0xB8>", "\u2581", "<0xF0>", "<0x9F>", "<0x92>", "<0xB0>"],
+      ids: [1, 31822, 243, 162, 155, 133, 31822, 243, 162, 148, 144, 31822, 243, 162, 167, 166, 31822, 243, 162, 155, 144, 31822, 243, 162, 155, 176, 31822, 243, 162, 145, 140, 31822, 243, 162, 156, 146, 31822, 243, 162, 155, 141, 31822, 243, 162, 151, 168, 31822, 243, 162, 155, 132, 31822, 243, 162, 155, 136, 31822, 243, 162, 167, 154, 31822, 243, 162, 155, 137, 31822, 243, 162, 148, 146, 31822, 229, 160, 167, 242, 187, 146, 31822, 243, 162, 149, 159, 31822, 243, 162, 149, 157, 31822, 243, 162, 149, 154, 31822, 243, 162, 149, 156, 31822, 243, 162, 153, 167, 31822, 243, 162, 155, 145, 31822, 243, 162, 148, 143, 31822, 243, 162, 168, 182, 31822, 243, 162, 149, 173, 31822, 229, 159, 171, 31822, 243, 162, 148, 140, 31822, 243, 162, 148, 131, 31822, 243, 162, 149, 178, 31822, 243, 162, 145, 139, 31822, 243, 162, 156, 139, 31822, 243, 162, 156, 143, 31822, 243, 162, 149, 131, 31822, 243, 162, 148, 138, 31822, 243, 162, 148, 142, 31822, 229, 159, 136, 31822, 243, 162, 145, 132, 31822, 243, 162, 143, 161, 31822, 243, 162, 143, 187, 31822, 243, 162, 149, 179],
+      decoded: "<s> \ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2581", "<0xE2>", "<0x9C>", "<0xA8>", "\u2581", "<0xF0>", "<0x9F>", "<0xA4>", "<0x97>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0x81>", "<0xEF>", "<0xB8>", "<0x8F>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0xB1>", "<0xF0>", "<0x9F>", "<0x8F>", "<0xBB>", "\u2581", "<0xF0>", "<0x9F>", "<0x95>", "<0xB5>", "<0xE2>", "<0x80>", "<0x8D>", "<0xE2>", "<0x99>", "<0x82>", "<0xEF>", "<0xB8>", "<0x8F>", "\u2581", "<0xF0>", "<0x9F>", "<0xA7>", "<0x99>", "<0xF0>", "<0x9F>", "<0x8F>", "<0xBB>", "<0xE2>", "<0x80>", "<0x8D>", "<0xE2>", "<0x99>", "<0x82>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0xA8>", "<0xF0>", "<0x9F>", "<0x8F>", "<0xBB>", "<0xE2>", "<0x80>", "<0x8D>", "<0xF0>", "<0x9F>", "<0x8C>", "<0xBE>", "\u2581", "<0xF0>", "<0x9F>", "<0xA7>", "<0x91>", "<0xE2>", "<0x80>", "<0x8D>", "<0xF0>", "<0x9F>", "<0xA4>", "<0x9D>", "<0xE2>", "<0x80>", "<0x8D>", "<0xF0>", "<0x9F>", "<0xA7>", "<0x91>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0xA9>", "<0xE2>", "<0x80>", "<0x8D>", "<0xE2>", "<0x9D>", "<0xA4>", "<0xE2>", "<0x80>", "<0x8D>", "<0xF0>", "<0x9F>", "<0x92>", "<0x8B>", "<0xE2>", "<0x80>", "<0x8D>", "<0xF0>", "<0x9F>", "<0x91>", "<0xA8>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0xA9>", "<0xE2>", "<0x80>", "<0x8D>", "<0xF0>", "<0x9F>", "<0x91>", "<0xA9>", "<0xE2>", "<0x80>", "<0x8D>", "<0xF0>", "<0x9F>", "<0x91>", "<0xA7>", "<0xE2>", "<0x80>", "<0x8D>", "<0xF0>", "<0x9F>", "<0x91>", "<0xA6>", "\u2581", "<0xF0>", "<0x9F>", "<0xA7>", "<0x91>", "<0xF0>", "<0x9F>", "<0x8F>", "<0xBB>", "<0xE2>", "<0x80>", "<0x8D>", "<0xF0>", "<0x9F>", "<0xA4>", "<0x9D>", "<0xE2>", "<0x80>", "<0x8D>", "<0xF0>", "<0x9F>", "<0xA7>", "<0x91>", "<0xF0>", "<0x9F>", "<0x8F>", "<0xBB>", "\u2581", "<0xF0>", "<0x9F>", "<0x8F>", "<0xB4>", "<0xF3>", "<0xA0>", "<0x81>", "<0xA7>", "<0xF3>", "<0xA0>", "<0x81>", "<0xA2>", "<0xF3>", "<0xA0>", "<0x81>", "<0xA5>", "<0xF3>", "<0xA0>", "<0x81>", "<0xAE>", "<0xF3>", "<0xA0>", "<0x81>", "<0xA7>", "<0xF3>", "<0xA0>", "<0x81>", "<0xBF>", "\u2581", "<0xF0>", "<0x9F>", "<0x91>", "<0xA8>", "<0xF0>", "<0x9F>", "<0x8F>", "<0xBB>", "<0xE2>", "<0x80>", "<0x8D>", "<0xE2>", "<0x9D>", "<0xA4>", "<0xEF>", "<0xB8>", "<0x8F>", "<0xE2>", "<0x80>", "<0x8D>", "<0xF0>", "<0x9F>", "<0x92>", "<0x8B>", "<0xE2>", "<0x80>", "<0x8D>", "<0xF0>", "<0x9F>", "<0x91>", "<0xA8>", "<0xF0>", "<0x9F>", "<0x8F>", "<0xBC>"],
+      ids: [1, 31822, 229, 159, 171, 31822, 243, 162, 167, 154, 31822, 243, 162, 148, 132, 242, 187, 146, 31822, 243, 162, 148, 180, 243, 162, 146, 190, 31822, 243, 162, 152, 184, 229, 131, 144, 229, 156, 133, 242, 187, 146, 31822, 243, 162, 170, 156, 243, 162, 146, 190, 229, 131, 144, 229, 156, 133, 31822, 243, 162, 148, 171, 243, 162, 146, 190, 229, 131, 144, 243, 162, 143, 193, 31822, 243, 162, 170, 148, 229, 131, 144, 243, 162, 167, 160, 229, 131, 144, 243, 162, 170, 148, 31822, 243, 162, 148, 172, 229, 131, 144, 229, 160, 167, 229, 131, 144, 243, 162, 149, 142, 229, 131, 144, 243, 162, 148, 171, 31822, 243, 162, 148, 172, 229, 131, 144, 243, 162, 148, 172, 229, 131, 144, 243, 162, 148, 170, 229, 131, 144, 243, 162, 148, 169, 31822, 243, 162, 170, 148, 243, 162, 146, 190, 229, 131, 144, 243, 162, 167, 160, 229, 131, 144, 243, 162, 170, 148, 243, 162, 146, 190, 31822, 243, 162, 146, 183, 246, 163, 132, 170, 246, 163, 132, 165, 246, 163, 132, 168, 246, 163, 132, 177, 246, 163, 132, 170, 246, 163, 132, 194, 31822, 243, 162, 148, 171, 243, 162, 146, 190, 229, 131, 144, 229, 160, 167, 242, 187, 146, 229, 131, 144, 243, 162, 149, 142, 229, 131, 144, 243, 162, 148, 171, 243, 162, 146, 191],
+      decoded: "<s> \u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75\u200d\u2642\ufe0f \ud83e\uddd9\ud83c\udffb\u200d\u2642 \ud83d\udc68\ud83c\udffb\u200d\ud83c\udf3e \ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1 \ud83d\udc69\u200d\u2764\u200d\ud83d\udc8b\u200d\ud83d\udc68 \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 \ud83e\uddd1\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc68\ud83c\udffc",
+    },
+    NEWLINE_WITH_LEADING_SPACE: {
+      text: LLAMA_TEST_STRINGS.NEWLINE_WITH_LEADING_SPACE,
+      tokens: ["\u2581", "\u2581", "<0x0A>"],
+      ids: [1, 31822, 31822, 13],
+      decoded: "<s>  \n",
+    },
+    CHINESE_LETTER: {
+      text: LLAMA_TEST_STRINGS.CHINESE_LETTER,
+      tokens: ["\u2581", "<0xE9>", "<0x95>", "<0x87>"],
+      ids: [1, 31822, 236, 152, 138],
+      decoded: "<s> \u9547",
+    },
+    PARAGRAPH: {
+      text: LLAMA_TEST_STRINGS.PARAGRAPH,
+      tokens: ["\u2581The", "\u2581ll", "ama", "\u2581(", "/", "<0xCB>", "<0x88>", "l", "<0xC9>", "<0x91>", "<0xCB>", "<0x90>", "m", "<0xC9>", "<0x99>", "/", ";", "\u2581", "<0xF0>", "<0x9F>", "<0xA6>", "<0x99>", "Sp", "anish", "\u2581pron", "unciation", ":", "\u2581[", "<0xCB>", "<0x88>", "<0xCA>", "<0x8E>", "ama", "])", "\u2581(", "L", "ama", "\u2581gl", "ama", ")", "\u2581is", "\u2581a", "\u2581domest", "icated", "\u2581South", "\u2581American", "\u2581cam", "el", "id", ",", "\u2581widely", "\u2581used", "\u2581as", "\u2581a", "\u2581meat", "\u2581and", "\u2581pack", "\u2581animal", "\u2581by", "\u2581And", "ean", "\u2581cultures", "\u2581since", "\u2581the", "\u2581Pre", "-", "Col", "umb", "ian", "\u2581era", ".", "\u2581L", "lam", "as", "\u2581are", "\u2581social", "\u2581animals", "\u2581and", "\u2581live", "\u2581with", "\u2581others", "\u2581as", "\u2581a", "\u2581herd", ".", "\u2581Their", "\u2581wool", "\u2581is", "\u2581soft", "\u2581and", "\u2581contains", "\u2581only", "\u2581a", "\u2581small", "\u2581amount", "\u2581of", "\u2581l", "anol", "in", ".[", "2", "]", "\u2581L", "lam", "as", "\u2581can", "\u2581learn", "\u2581simple", "\u2581", "t", "asks", "\u2581after", "\u2581a", "\u2581few", "\u2581repet", "itions", ".", "\u2581When", "\u2581using", "\u2581a", "\u2581pack", ",", "\u2581they", "\u2581can", "\u2581carry", "\u2581about", "\u2581", "2", "5", "\u2581to", "\u2581", "3", "0", "%", "\u2581of", "\u2581their", "\u2581body", "\u2581weight", "\u2581for", "\u2581", "8", "\u2581to", "\u2581", "1", "3", "\u2581km", "\u2581(", "5", "\u2013", "8", "\u2581miles", ").", "[", "3", "]", "\u2581The", "\u2581name", "\u2581ll", "ama", "\u2581(", "in", "\u2581the", "\u2581past", "\u2581also", "\u2581sp", "elled", '\u2581"', "l", "ama", '"', "\u2581or", '\u2581"', "gl", "ama", '")', "\u2581was", "\u2581adopted", "\u2581by", "\u2581European", "\u2581settlers", "\u2581from", "\u2581native", "\u2581Per", "uv", "ians", ".[", "4", "]", "\u2581The", "\u2581ancestors", "\u2581of", "\u2581l", "lam", "as", "\u2581are", "\u2581thought", "\u2581to", "\u2581have", "\u2581originated", "\u2581from", "\u2581the", "\u2581Great", "\u2581Plains", "\u2581of", "\u2581North", "\u2581America", "\u2581about", "\u2581", "4", "0", "\u2581million", "\u2581years", "\u2581ago", ",", "\u2581and", "\u2581subsequently", "\u2581mig", "rated", "\u2581to", "\u2581South", "\u2581America", "\u2581about", "\u2581three", "\u2581million", "\u2581years", "\u2581ago", "\u2581during", "\u2581the", "\u2581Great", "\u2581American", "\u2581Inter", "change", ".", "\u2581By", "\u2581the", "\u2581end", "\u2581of", "\u2581the", "\u2581last", "\u2581ice", "\u2581age", "\u2581(", "1", "0", ",", "0", "0", "0", "\u2013", "1", "2", ",", "0", "0", "0", "\u2581years", "\u2581ago", "),", "\u2581cam", "el", "ids", "\u2581were", "\u2581extinct", "\u2581in", "\u2581North", "\u2581America", ".[", "3", "]", "\u2581As", "\u2581of", "\u2581", "2", "0", "0", "7", ",", "\u2581there", "\u2581were", "\u2581over", "\u2581seven", "\u2581million", "\u2581l", "lam", "as", "\u2581and", "\u2581al", "p", "ac", "as", "\u2581in", "\u2581South", "\u2581America", "\u2581and", "\u2581over", "\u2581", "1", "5", "8", ",", "0", "0", "0", "\u2581l", "lam", "as", "\u2581and", "\u2581", "1", "0", "0", ",", "0", "0", "0", "<0xEA>", "<0x99>", "<0x8A>", "<0xF0>", "<0x9F>", "<0xA6>", "<0x99>", "\u2581al", "p", "ac", "as", ",", "\u2581descended", "\u2581from", "\u2581pro", "gen", "itors", "\u2581imported", "\u2581late", "\u2581in", "\u2581the", "\u2581", "2", "0", "th", "\u2581century", ",", "\u2581in", "\u2581the", "\u2581United", "\u2581States", "\u2581and", "\u2581Canada", ".[", "5", "]", "\u2581In", "\u2581A", "ym", "ara", "\u2581mythology", ",", "\u2581l", "lam", "as", "\u2581are", "\u2581important", "\u2581beings", ".", "\u2581The", "\u2581Heaven", "ly", "\u2581Ll", "ama", "\u2581is", "\u2581said", "\u2581to", "\u2581drink", "\u2581water", "\u2581from", "\u2581the", "\u2581ocean", "\u2581and", "\u2581ur", "inates", "\u2581as", "\u2581it", "\u2581rains", ".[", "6", "]", "\u2581According", "\u2581to", "\u2581A", "ym", "ara", "\u2581es", "chat", "ology", ",", "\u2581l", "lam", "as", "\u2581will", "\u2581return", "\u2581to", "\u2581the", "\u2581water", "\u2581springs", "\u2581and", "\u2581l", "ago", "ons", "\u2581where", "\u2581they", "\u2581come", "\u2581from", "\u2581at", "\u2581the", "\u2581end", "\u2581of", "\u2581time", ".[", "6", "]"],
+      ids: [1, 347, 31763, 2269, 352, 31873, 206, 139, 31832, 204, 148, 206, 147, 31836, 204, 156, 31873, 31891, 31822, 243, 162, 169, 156, 8889, 5817, 11155, 26128, 31871, 836, 206, 139, 205, 145, 2269, 9772, 352, 31867, 2269, 1192, 2269, 31861, 322, 260, 27940, 2672, 1897, 1454, 3764, 307, 317, 31844, 7055, 1065, 362, 260, 8659, 291, 2667, 6075, 417, 787, 14083, 10775, 1314, 266, 2345, 31854, 4848, 2234, 620, 5998, 31843, 372, 3082, 295, 397, 1619, 5220, 291, 1983, 351, 1892, 362, 260, 27172, 31843, 4585, 22729, 322, 2647, 291, 5140, 744, 260, 1435, 2399, 287, 309, 18426, 261, 3564, 31855, 31908, 372, 3082, 295, 473, 1977, 3102, 31822, 31824, 5577, 768, 260, 1346, 17042, 1479, 31843, 1408, 1340, 260, 2667, 31844, 526, 473, 3875, 562, 31822, 31855, 31880, 289, 31822, 31878, 31852, 31914, 287, 518, 2108, 4182, 329, 31822, 31886, 289, 31822, 31853, 31878, 6512, 352, 31880, 31906, 31886, 4465, 656, 31907, 31878, 31908, 347, 1382, 31763, 2269, 352, 261, 266, 1646, 615, 612, 5902, 495, 31832, 2269, 31875, 405, 495, 4261, 2269, 4290, 393, 7574, 417, 2821, 23343, 427, 6412, 2083, 10099, 1580, 3564, 31882, 31908, 347, 18294, 287, 309, 3082, 295, 397, 1991, 289, 435, 20355, 427, 266, 3172, 26744, 287, 1975, 2139, 562, 31822, 31882, 31852, 1577, 778, 2236, 31844, 291, 11786, 21052, 3397, 289, 1897, 2139, 562, 1166, 1577, 778, 2236, 1177, 266, 3172, 1454, 3029, 3604, 31843, 1433, 266, 928, 287, 266, 1060, 5707, 2253, 352, 31853, 31852, 31844, 31852, 31852, 31852, 31906, 31853, 31855, 31844, 31852, 31852, 31852, 778, 2236, 698, 3764, 307, 1982, 577, 30610, 288, 1975, 2139, 3564, 31878, 31908, 717, 287, 31822, 31855, 31852, 31852, 31888, 31844, 635, 577, 648, 3931, 1577, 309, 3082, 295, 291, 366, 31837, 380, 295, 288, 1897, 2139, 291, 648, 31822, 31853, 31880, 31886, 31844, 31852, 31852, 31852, 309, 3082, 295, 291, 31822, 31853, 31852, 31852, 31844, 31852, 31852, 31852, 237, 156, 141, 243, 162, 169, 156, 366, 31837, 380, 295, 31844, 27627, 427, 375, 3353, 4705, 17798, 2732, 288, 266, 31822, 31855, 31852, 388, 3373, 31844, 288, 266, 1494, 1769, 291, 3008, 3564, 31880, 31908, 455, 308, 1276, 2776, 24143, 31844, 309, 3082, 295, 397, 1480, 11844, 31843, 347, 15836, 326, 11321, 2269, 322, 664, 289, 5065, 1579, 427, 266, 8622, 291, 4328, 11466, 362, 357, 28738, 3564, 31887, 31908, 3252, 289, 308, 1276, 2776, 1582, 20068, 1058, 31844, 309, 3082, 295, 482, 1199, 289, 266, 1579, 24250, 291, 309, 3405, 680, 804, 526, 1412, 427, 389, 266, 928, 287, 647, 3564, 31887, 31908],
+      decoded: '<s> The llama (/\u02c8l\u0251\u02d0m\u0259/; \ud83e\udd99Spanish pronunciation: [\u02c8\u028eama]) (Lama glama) is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the Pre-Columbian era. Llamas are social animals and live with others as a herd. Their wool is soft and contains only a small amount of lanolin.[2] Llamas can learn simple tasks after a few repetitions. When using a pack, they can carry about 25 to 30% of their body weight for 8 to 13 km (5\u20138 miles).[3] The name llama (in the past also spelled "lama" or "glama") was adopted by European settlers from native Peruvians.[4] The ancestors of llamas are thought to have originated from the Great Plains of North America about 40 million years ago, and subsequently migrated to South America about three million years ago during the Great American Interchange. By the end of the last ice age (10,000\u201312,000 years ago), camelids were extinct in North America.[3] As of 2007, there were over seven million llamas and alpacas in South America and over 158,000 llamas and 100,000\ua64a\ud83e\udd99 alpacas, descended from progenitors imported late in the 20th century, in the United States and Canada.[5] In Aymara mythology, llamas are important beings. The Heavenly Llama is said to drink water from the ocean and urinates as it rains.[6] According to Aymara eschatology, llamas will return to the water springs and lagoons where they come from at the end of time.[6]',
+    },
+  },
+  "Xenova/deepseek-coder-1.3b-instruct": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "\u0120are", "\u0120you", "\u0120doing", "?"],
+      ids: [32013, 2808, 417, 340, 3207, 30],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>How are you doing?",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "\u0120should", "'", "ve", "\u0120done", "\u0120this"],
+      ids: [32013, 2042, 1020, 6, 312, 2359, 437],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>You should've done this",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "\u0120", "0", "\u0120", "1", "\u0120", "2", "\u0120", "3", "\u0120", "4", "\u0120", "5", "\u0120", "6", "\u0120", "7", "\u0120", "8", "\u0120", "9", "\u0120", "1", "0", "\u0120", "1", "0", "0", "\u0120", "1", "0", "0", "0"],
+      ids: [32013, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 207, 15, 207, 16, 207, 17, 207, 18, 207, 19, 207, 20, 207, 21, 207, 22, 207, 23, 207, 24, 207, 16, 15, 207, 16, 15, 15, 207, 16, 15, 15, 15],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120founded", "\u0120in", "\u0120", "2", "0", "1", "6", "."],
+      ids: [32013, 546, 2595, 438, 16316, 279, 207, 17, 15, 16, 21, 13],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>The company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'", "ll", "\u0120!!", "to", "?'", "d", "''", "d", "\u0120of", ",", "\u0120can", "'", "t", "."],
+      ids: [32013, 32, 185, 6, 642, 24466, 577, 11665, 67, 4191, 67, 280, 11, 482, 6, 83, 13],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>A\n'll !!to?'d''d of, can't.",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "\u0120main", "():", "\u010a", "\u0109", "pass"],
+      ids: [32013, 1551, 1959, 10942, 185, 184, 4805],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>def main():\n\tpass",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120obj", ".", "toString", "();", "\u010a", "toString", "();"],
+      ids: [32013, 1160, 245, 405, 6528, 13, 12617, 1293, 185, 12617, 1293],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>let a = obj.toString();\ntoString();",
+    },
+    NEWLINES: {
+      text: LLAMA_TEST_STRINGS.NEWLINES,
+      tokens: ["ax", "\u010a", "####", "\u010a", "bo", "o"],
+      ids: [32013, 1099, 185, 3576, 185, 952, 78],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>ax\n####\nboo",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "want", "\u00c3\u00a9d", ",", "running"],
+      ids: [32013, 4348, 28626, 31898, 11, 22785],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>UNwant\u00e9d,running",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0100", "2", "\u00ef\u00bf\u00bd", "3"],
+      ids: [32013, 16, 175, 17, 10006, 18],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>1\u00002\ufffd3",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hello", "\u0120World"],
+      ids: [32013, 17535, 5414],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>Hello World",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello", "\u0120world"],
+      ids: [32013, 31702, 1835],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>hello world",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141\u00e6\u00b4\u00bb\u00e7\u013c\u0126", "\u00e7\u013e\u0141", "\u00e8\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [32013, 23393, 2651, 1534, 236, 502],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120\u0120", "\u0120leading", "\u0120space"],
+      ids: [32013, 243, 5877, 2507],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>   leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tra", "iling", "\u0120space", "\u0120\u0120\u0120"],
+      ids: [32013, 7246, 5964, 2507, 315],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>trailing space   ",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["Hi", "\u0120", "\u0120H", "ello"],
+      ids: [32013, 11041, 207, 414, 9489],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>Hi  Hello",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120", "\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2", "\u00a5", "6", "\u0120", "\u00e2\u0124", "\u00a3", "7", "\u0120", "\u00e2\u0124", "\u00b9", "8", "\u0120", "\u00e2\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [32013, 2806, 371, 16, 432, 17, 1494, 18, 207, 11010, 19, 8761, 20, 2688, 98, 21, 207, 7935, 96, 22, 207, 7935, 117, 23, 207, 7935, 109, 24, 1719],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120bought", "\u0120an", "\u0120apple", "\u0120for", "\u0120$", "1", ".", "0", "0", "\u0120at", "\u0120the", "\u0120store", "."],
+      ids: [32013, 40, 8942, 274, 15902, 327, 371, 16, 13, 15, 15, 429, 254, 4730, 13],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>I bought an apple for $1.00 at the store.",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u0120\u0120"],
+      ids: [32013, 4209, 2484, 243],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>you\u2026  ",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [32013, 4209, 2484, 10447],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>you\u2026\u00a0\u00a0",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142", "\u00c2\u0142", "you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [32013, 4209, 2484, 1200, 1200, 4209, 2484, 10447],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120", "\u00ef", "\u00bd", "\u0140", "\u0120edge", "\u0120", "\u00ef", "\u00bd", "\u0140", "\u0120case"],
+      ids: [32013, 828, 2369, 207, 169, 121, 239, 5935, 207, 169, 121, 239, 1452],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>weird \uff5e edge \uff5e case",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u00f0\u0141", "\u013a", "\u0124", "\u0120\u00f0\u0141", "\u0133", "\u012f", "\u0120\u00f0\u0141", "\u00a4", "\u00a3", "\u0120\u00f0\u0141", "\u013a", "\u012f", "\u0120\u00f0\u0141", "\u013a", "\u0143", "\u0120\u00f0\u0141", "\u0130", "\u012b", "\u0120\u00f0\u0141\u013b", "\u0131", "\u0120\u00f0\u0141", "\u013a", "\u012c", "\u0120\u00f0\u0141", "\u0136", "\u00a5", "\u0120\u00f0\u0141", "\u013a", "\u0123", "\u0120\u00f0\u0141", "\u013a", "\u0127", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141", "\u013a", "\u0128", "\u0120\u00f0\u0141", "\u0133", "\u0131", "\u0120", "\u00e2", "\u013f", "\u00a4", "\u00ef", "\u00b8", "\u0131", "\u0120\u00f0\u0141", "\u0134", "\u013e", "\u0120\u00f0\u0141", "\u0134", "\u013c", "\u0120\u00f0\u0141", "\u0134", "\u0139", "\u0120\u00f0\u0141", "\u0134", "\u013b", "\u0120\u00f0\u0141", "\u0138", "\u00a4", "\u0120\u00f0\u0141", "\u013a", "\u0130", "\u0120\u00f0\u0141", "\u0133", "\u012e", "\u0120\u00f0\u0141", "\u00a5", "\u00b3", "\u0120\u00f0\u0141", "\u0134", "\u00aa", "\u0120", "\u00e2", "\u013e", "\u00a8", "\u0120\u00f0\u0141", "\u0133", "\u012b", "\u0120\u00f0\u0141", "\u0133", "\u0122", "\u0120\u00f0\u0141", "\u0134", "\u00af", "\u0120\u00f0\u0141", "\u0130", "\u012a", "\u0120\u00f0\u0141\u013b", "\u012a", "\u0120\u00f0\u0141\u013b", "\u012e", "\u0120\u00f0\u0141", "\u0134", "\u0122", "\u0120\u00f0\u0141", "\u0133", "\u0129", "\u0120\u00f0\u0141", "\u0133", "\u012d", "\u0120", "\u00e2", "\u013e", "\u0127", "\u0120\u00f0\u0141", "\u0130", "\u0123", "\u0120\u00f0\u0141", "\u012e", "\u0140", "\u0120\u00f0\u0141", "\u012e", "\u00b8", "\u0120\u00f0\u0141", "\u0134", "\u00b0"],
+      ids: [32013, 10047, 233, 211, 12394, 226, 222, 12394, 97, 96, 12394, 233, 222, 12394, 233, 242, 12394, 223, 218, 22709, 224, 12394, 233, 219, 12394, 229, 98, 12394, 233, 210, 12394, 233, 214, 12394, 97, 232, 12394, 233, 215, 12394, 226, 224, 207, 156, 238, 97, 169, 116, 224, 12394, 227, 237, 12394, 227, 235, 12394, 227, 232, 12394, 227, 234, 12394, 231, 97, 12394, 233, 223, 12394, 226, 221, 12394, 98, 111, 12394, 227, 103, 207, 156, 237, 101, 12394, 226, 218, 12394, 226, 209, 12394, 227, 107, 12394, 223, 217, 22709, 217, 22709, 221, 12394, 227, 209, 12394, 226, 216, 12394, 226, 220, 207, 156, 237, 214, 12394, 223, 210, 12394, 221, 239, 12394, 221, 116, 12394, 227, 108],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>\ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u00e2", "\u013e", "\u00a8", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141", "\u0133", "\u0123", "\u00ef", "\u00b8", "\u0131", "\u0120\u00f0\u0141", "\u0133", "\u00b1", "\u00f0\u0141", "\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0137", "\u00b5", "\u00e2\u0122", "\u012f", "\u00e2", "\u013b", "\u0124", "\u00ef", "\u00b8", "\u0131", "\u0120\u00f0\u0141", "\u00a7", "\u013b", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00e2", "\u013b", "\u0124", "\u0120\u00f0\u0141", "\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u012e", "\u00be", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u0120\u00f0\u0141", "\u0133", "\u00a9", "\u00e2\u0122", "\u012f", "\u00e2", "\u013f", "\u00a4", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0134", "\u012d", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a8", "\u0120\u00f0\u0141", "\u0133", "\u00a9", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a9", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a7", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a6", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141", "\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0131", "\u00b4", "\u00f3", "\u0142", "\u0123", "\u00a7", "\u00f3", "\u0142", "\u0123", "\u00a2", "\u00f3", "\u0142", "\u0123", "\u00a5", "\u00f3", "\u0142", "\u0123", "\u00ae", "\u00f3", "\u0142", "\u0123", "\u00a7", "\u00f3", "\u0142", "\u0123", "\u00bf", "\u0120\u00f0\u0141", "\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00e2", "\u013f", "\u00a4", "\u00ef", "\u00b8", "\u0131", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0134", "\u012d", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bc"],
+      ids: [32013, 156, 237, 101, 12394, 97, 232, 12394, 226, 210, 169, 116, 224, 12394, 226, 109, 10047, 224, 119, 12394, 230, 113, 350, 222, 156, 234, 211, 169, 116, 224, 12394, 100, 234, 10047, 224, 119, 350, 222, 156, 234, 211, 12394, 226, 101, 10047, 224, 119, 350, 222, 10047, 221, 122, 12394, 100, 226, 350, 222, 10047, 97, 238, 350, 222, 10047, 100, 226, 12394, 226, 102, 350, 222, 156, 238, 97, 350, 222, 10047, 227, 220, 350, 222, 10047, 226, 101, 12394, 226, 102, 350, 222, 10047, 226, 102, 350, 222, 10047, 226, 100, 350, 222, 10047, 226, 99, 12394, 100, 226, 10047, 224, 119, 350, 222, 10047, 97, 238, 350, 222, 10047, 100, 226, 10047, 224, 119, 12394, 224, 112, 173, 241, 210, 100, 173, 241, 210, 95, 173, 241, 210, 98, 173, 241, 210, 106, 173, 241, 210, 100, 173, 241, 210, 123, 12394, 226, 101, 10047, 224, 119, 350, 222, 156, 238, 97, 169, 116, 224, 350, 222, 10047, 227, 220, 350, 222, 10047, 226, 101, 10047, 224, 120],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>\u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75\u200d\u2642\ufe0f \ud83e\uddd9\ud83c\udffb\u200d\u2642 \ud83d\udc68\ud83c\udffb\u200d\ud83c\udf3e \ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1 \ud83d\udc69\u200d\u2764\u200d\ud83d\udc8b\u200d\ud83d\udc68 \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 \ud83e\uddd1\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc68\ud83c\udffc",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u00e2\u0138", "\u0123", "This", "\u0120", "\u00e2\u0138", "\u0123", "is", "\u0120", "\u00e2\u0138", "\u0123", "a", "\u0120", "\u00e2\u0138", "\u0123", "test", "\u0120", "\u00e2\u0138", "\u0123", "."],
+      ids: [32013, 11028, 210, 1559, 207, 11028, 210, 262, 207, 11028, 210, 64, 207, 11028, 210, 2806, 207, 11028, 210, 13],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>\u2581This \u2581is \u2581a \u2581test \u2581.",
+    },
+    BPE_SCORES_PRIORITY_1: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_1,
+      tokens: ["gr", "ab", "bed"],
+      ids: [32013, 877, 356, 3861],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>grabbed",
+    },
+    BPE_SCORES_PRIORITY_2: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_2,
+      tokens: ["\u0120grab", "bed"],
+      ids: [32013, 14596, 3861],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c> grabbed",
+    },
+    BPE_SCORES_PRIORITY_3: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_3,
+      tokens: ["\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120\u0120", "\u0120grab", "bed"],
+      ids: [32013, 3137, 14596, 3861],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>           grabbed",
+    },
+    NEWLINE: {
+      text: LLAMA_TEST_STRINGS.NEWLINE,
+      tokens: ["\u010a"],
+      ids: [32013, 185],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>\n",
+    },
+    NEWLINE_WITH_LEADING_SPACE: {
+      text: LLAMA_TEST_STRINGS.NEWLINE_WITH_LEADING_SPACE,
+      tokens: ["\u0120", "\u010a"],
+      ids: [32013, 207, 185],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c> \n",
+    },
+    TABS: {
+      text: LLAMA_TEST_STRINGS.TABS,
+      tokens: ["\u0109", "tabs", "\u0109\u0109\u0109", "\u0109", "out", "\u0120here"],
+      ids: [32013, 184, 20611, 1749, 184, 406, 1283],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>\ttabs\t\t\t\tout here",
+    },
+    NEWLINE_AND_TAB: {
+      text: LLAMA_TEST_STRINGS.NEWLINE_AND_TAB,
+      tokens: ["\u010a", "\u0109", "\u010a"],
+      ids: [32013, 185, 184, 185],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>\n\t\n",
+    },
+    CHINESE_LETTER: {
+      text: LLAMA_TEST_STRINGS.CHINESE_LETTER,
+      tokens: ["\u00e9\u0137\u0129"],
+      ids: [32013, 6759],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>\u9547",
+    },
+    EMOJIS_1: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_1,
+      tokens: ["\u00f0\u0141", "\u00a6", "\u013b"],
+      ids: [32013, 10047, 99, 234],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>\ud83e\udd99",
+    },
+    EMOJIS_2: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_2,
+      tokens: ["\u00f0\u0141", "\u00a6", "\u013b", "\u00ea", "\u013b", "\u012c"],
+      ids: [32013, 10047, 99, 234, 164, 234, 219],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>\ud83e\udd99\ua64a",
+    },
+    EMOJIS_3: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_3,
+      tokens: ["\u00ea", "\u013b", "\u012c", "\u00f0\u0141", "\u00a6", "\u013b"],
+      ids: [32013, 164, 234, 219, 10047, 99, 234],
+      decoded: "<\uff5cbegin\u2581of\u2581sentence\uff5c>\ua64a\ud83e\udd99",
+    },
+    PARAGRAPH: {
+      text: LLAMA_TEST_STRINGS.PARAGRAPH,
+      tokens: ["The", "\u0120ll", "ama", "\u0120(/", "\u00cb\u012a", "l", "\u00c9", "\u0133", "\u00cb", "\u0132", "m", "\u00c9\u013b", "/", ";", "\u0120\u00f0\u0141", "\u00a6", "\u013b", "Span", "ish", "\u0120pron", "unciation", ":", "\u0120[", "\u00cb\u012a", "\u00ca", "\u0130", "ama", "])", "\u0120(", "L", "ama", "\u0120gl", "ama", ")", "\u0120is", "\u0120a", "\u0120domestic", "ated", "\u0120South", "\u0120American", "\u0120cam", "el", "id", ",", "\u0120widely", "\u0120used", "\u0120as", "\u0120a", "\u0120meat", "\u0120and", "\u0120pack", "\u0120animal", "\u0120by", "\u0120And", "ean", "\u0120cultures", "\u0120since", "\u0120the", "\u0120Pre", "-", "Col", "umb", "ian", "\u0120era", ".", "\u0120L", "lam", "as", "\u0120are", "\u0120social", "\u0120animals", "\u0120and", "\u0120live", "\u0120with", "\u0120others", "\u0120as", "\u0120a", "\u0120her", "d", ".", "\u0120Their", "\u0120wool", "\u0120is", "\u0120soft", "\u0120and", "\u0120contains", "\u0120only", "\u0120a", "\u0120small", "\u0120amount", "\u0120of", "\u0120lan", "ol", "in", ".[", "2", "]", "\u0120L", "lam", "as", "\u0120can", "\u0120learn", "\u0120simple", "\u0120tasks", "\u0120after", "\u0120a", "\u0120few", "\u0120repet", "itions", ".", "\u0120When", "\u0120using", "\u0120a", "\u0120pack", ",", "\u0120they", "\u0120can", "\u0120carry", "\u0120about", "\u0120", "2", "5", "\u0120to", "\u0120", "3", "0", "%", "\u0120of", "\u0120their", "\u0120body", "\u0120weight", "\u0120for", "\u0120", "8", "\u0120to", "\u0120", "1", "3", "\u0120km", "\u0120(", "5", "\u00e2\u0122\u0135", "8", "\u0120miles", ").", "[", "3", "]", "\u0120The", "\u0120name", "\u0120ll", "ama", "\u0120(", "in", "\u0120the", "\u0120past", "\u0120also", "\u0120sp", "elled", '\u0120"', "l", "ama", '"', "\u0120or", '\u0120"', "gl", "ama", '")', "\u0120was", "\u0120adopted", "\u0120by", "\u0120European", "\u0120sett", "lers", "\u0120from", "\u0120native", "\u0120Per", "uv", "ians", ".[", "4", "]", "\u0120The", "\u0120ancest", "ors", "\u0120of", "\u0120llam", "as", "\u0120are", "\u0120thought", "\u0120to", "\u0120have", "\u0120origin", "ated", "\u0120from", "\u0120the", "\u0120Great", "\u0120Pl", "ains", "\u0120of", "\u0120North", "\u0120America", "\u0120about", "\u0120", "4", "0", "\u0120million", "\u0120years", "\u0120ago", ",", "\u0120and", "\u0120subsequently", "\u0120mig", "rated", "\u0120to", "\u0120South", "\u0120America", "\u0120about", "\u0120three", "\u0120million", "\u0120years", "\u0120ago", "\u0120during", "\u0120the", "\u0120Great", "\u0120American", "\u0120Inter", "change", ".", "\u0120By", "\u0120the", "\u0120end", "\u0120of", "\u0120the", "\u0120last", "\u0120ice", "\u0120age", "\u0120(", "1", "0", ",", "0", "0", "0", "\u00e2\u0122\u0135", "1", "2", ",", "0", "0", "0", "\u0120years", "\u0120ago", "),", "\u0120cam", "el", "ids", "\u0120were", "\u0120ext", "inct", "\u0120in", "\u0120North", "\u0120America", ".[", "3", "]", "\u0120As", "\u0120of", "\u0120", "2", "0", "0", "7", ",", "\u0120there", "\u0120were", "\u0120over", "\u0120seven", "\u0120million", "\u0120llam", "as", "\u0120and", "\u0120al", "p", "ac", "as", "\u0120in", "\u0120South", "\u0120America", "\u0120and", "\u0120over", "\u0120", "1", "5", "8", ",", "0", "0", "0", "\u0120llam", "as", "\u0120and", "\u0120", "1", "0", "0", ",", "0", "0", "0", "\u00ea", "\u013b", "\u012c", "\u00f0\u0141", "\u00a6", "\u013b", "\u0120al", "p", "ac", "as", ",", "\u0120desc", "ended", "\u0120from", "\u0120pro", "gen", "itors", "\u0120imported", "\u0120late", "\u0120in", "\u0120the", "\u0120", "2", "0", "th", "\u0120century", ",", "\u0120in", "\u0120the", "\u0120United", "\u0120States", "\u0120and", "\u0120Canada", ".[", "5", "]", "\u0120In", "\u0120A", "ym", "ara", "\u0120myth", "ology", ",", "\u0120llam", "as", "\u0120are", "\u0120important", "\u0120beings", ".", "\u0120The", "\u0120Heaven", "ly", "\u0120Ll", "ama", "\u0120is", "\u0120said", "\u0120to", "\u0120drink", "\u0120water", "\u0120from", "\u0120the", "\u0120ocean", "\u0120and", "\u0120ur", "in", "ates", "\u0120as", "\u0120it", "\u0120ra", "ins", ".[", "6", "]", "\u0120According", "\u0120to", "\u0120A", "ym", "ara", "\u0120es", "chat", "ology", ",", "\u0120llam", "as", "\u0120will", "\u0120return", "\u0120to", "\u0120the", "\u0120water", "\u0120springs", "\u0120and", "\u0120l", "ago", "ons", "\u0120where", "\u0120they", "\u0120come", "\u0120from", "\u0120at", "\u0120the", "\u0120end", "\u0120of", "\u0120time", ".[", "6", "]"],
+      ids: [32013, 546, 1703, 4204, 31905, 31459, 75, 131, 226, 133, 225, 76, 28747, 14, 26, 12394, 99, 234, 20786, 840, 9119, 25307, 25, 821, 31459, 132, 223, 4204, 5589, 334, 43, 4204, 1649, 4204, 8, 317, 245, 13569, 612, 5168, 4115, 4370, 282, 304, 11, 13620, 1219, 372, 245, 12342, 285, 2379, 9542, 457, 1306, 24391, 24783, 1952, 254, 7606, 12, 2608, 4313, 987, 2895, 13, 412, 8265, 281, 417, 3601, 8469, 285, 3516, 365, 3060, 372, 245, 706, 67, 13, 9195, 24547, 317, 2829, 285, 5396, 885, 245, 1752, 3733, 280, 27264, 313, 246, 9469, 17, 60, 412, 8265, 281, 482, 3059, 2966, 9227, 1164, 245, 1853, 15747, 2160, 13, 2463, 1242, 245, 2379, 11, 653, 482, 5642, 782, 207, 17, 20, 276, 207, 18, 15, 4, 280, 699, 3110, 4285, 327, 207, 23, 276, 207, 16, 18, 9004, 334, 20, 887, 23, 6595, 628, 58, 18, 60, 428, 1208, 1703, 4204, 334, 246, 254, 2872, 835, 731, 6679, 440, 75, 4204, 1, 409, 440, 2521, 4204, 2456, 438, 13509, 457, 8717, 6762, 12104, 473, 8118, 3043, 12466, 3091, 9469, 19, 60, 428, 18901, 710, 280, 15410, 281, 417, 2207, 276, 463, 6948, 612, 473, 254, 6984, 2284, 2200, 280, 5216, 6092, 782, 207, 19, 15, 4866, 1547, 4074, 11, 285, 23909, 8290, 9831, 276, 5168, 6092, 782, 1846, 4866, 1547, 4074, 2310, 254, 6984, 4115, 6660, 4865, 13, 3550, 254, 1223, 280, 254, 1554, 9405, 4489, 334, 16, 15, 11, 15, 15, 15, 887, 16, 17, 11, 15, 15, 15, 1547, 4074, 650, 4370, 282, 2929, 773, 1309, 5729, 279, 5216, 6092, 9469, 18, 60, 1725, 280, 207, 17, 15, 15, 22, 11, 741, 773, 851, 7970, 4866, 15410, 281, 285, 360, 79, 305, 281, 279, 5168, 6092, 285, 851, 207, 16, 20, 23, 11, 15, 15, 15, 15410, 281, 285, 207, 16, 15, 15, 11, 15, 15, 15, 164, 234, 219, 10047, 99, 234, 360, 79, 305, 281, 11, 1774, 2611, 473, 381, 4920, 6041, 26357, 5179, 279, 254, 207, 17, 15, 392, 8299, 11, 279, 254, 4783, 5098, 285, 8905, 9469, 20, 60, 680, 338, 1254, 3367, 25157, 2333, 11, 15410, 281, 417, 2364, 22792, 13, 428, 18933, 326, 9140, 4204, 317, 989, 276, 7371, 2345, 473, 254, 15439, 285, 8580, 246, 980, 372, 359, 1809, 1231, 9469, 21, 60, 10068, 276, 338, 1254, 3367, 707, 24570, 2333, 11, 15410, 281, 540, 967, 276, 254, 2345, 30851, 285, 284, 5980, 875, 1064, 653, 1857, 473, 429, 254, 1223, 280, 761, 9469, 21, 60],
+      decoded: '<\uff5cbegin\u2581of\u2581sentence\uff5c>The llama (/\u02c8l\u0251\u02d0m\u0259/; \ud83e\udd99Spanish pronunciation: [\u02c8\u028eama]) (Lama glama) is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the Pre-Columbian era. Llamas are social animals and live with others as a herd. Their wool is soft and contains only a small amount of lanolin.[2] Llamas can learn simple tasks after a few repetitions. When using a pack, they can carry about 25 to 30% of their body weight for 8 to 13 km (5\u20138 miles).[3] The name llama (in the past also spelled "lama" or "glama") was adopted by European settlers from native Peruvians.[4] The ancestors of llamas are thought to have originated from the Great Plains of North America about 40 million years ago, and subsequently migrated to South America about three million years ago during the Great American Interchange. By the end of the last ice age (10,000\u201312,000 years ago), camelids were extinct in North America.[3] As of 2007, there were over seven million llamas and alpacas in South America and over 158,000 llamas and 100,000\ua64a\ud83e\udd99 alpacas, descended from progenitors imported late in the 20th century, in the United States and Canada.[5] In Aymara mythology, llamas are important beings. The Heavenly Llama is said to drink water from the ocean and urinates as it rains.[6] According to Aymara eschatology, llamas will return to the water springs and lagoons where they come from at the end of time.[6]',
+    },
+  },
+  "Xenova/tamillama_tiny_30m": {
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["\u2581The", "\u2581company", "\u2581was", "\u2581found", "ed", "\u2581in", "\u2581", "2", "0", "1", "6", "."],
+      ids: [1, 147, 10984, 139, 949, 78, 198, 31654, 13, 21, 12, 17, 34],
+      decoded: "<s> The company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["\u2581A", "\n", "'", "ll", "\u2581", "!", "!", "to", "?", "'", "d", "'", "'", "d", "\u2581of", ",", "\u2581can", "'", "t", "."],
+      ids: [1, 231, 5, 31, 370, 31654, 31715, 31715, 5140, 31725, 31, 31679, 31, 31, 31679, 251, 35, 645, 31, 31665, 34],
+      decoded: "<s> A\n'll !!to?'d''d of, can't.",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["\u2581def", "\u2581main", "(", ")", ":", "\n", "<unk>", "p", "ass"],
+      ids: [1, 12849, 17375, 32, 33, 29, 5, 0, 31694, 1917],
+      decoded: "<s> def main():\n<unk>pass",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["\u2581let", "\u2581a", "\u2581", "=", "\u2581ob", "j", ".", "to", "St", "ring", "(", ")", ";", "\n", "to", "St", "ring", "(", ")", ";"],
+      ids: [1, 1996, 48, 31654, 25, 4083, 31733, 34, 5140, 23417, 6631, 32, 33, 30, 5, 5140, 23417, 6631, 32, 33, 30],
+      decoded: "<s> let a = obj.toString();\ntoString();",
+    },
+    NEWLINES: {
+      text: LLAMA_TEST_STRINGS.NEWLINES,
+      tokens: ["\u2581ax", "\n", "#", "#", "#", "#", "\n", "boo"],
+      ids: [1, 11441, 5, 22, 22, 22, 22, 5, 21260],
+      decoded: "<s> ax\n####\nboo",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["\u2581U", "N", "w", "ant", "\u00e9", "d", ",", "r", "un", "ning"],
+      ids: [1, 5841, 31748, 31689, 1027, 31771, 31679, 35, 31678, 367, 1855],
+      decoded: "<s> UNwant\u00e9d,running",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["\u2581", "1", "<unk>", "2", "<unk>", "3"],
+      ids: [1, 31654, 12, 0, 13, 0, 14],
+      decoded: "<s> 1<unk>2<unk>3",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["\u2581H", "ello", "\u2581World"],
+      ids: [1, 207, 3589, 25544],
+      decoded: "<s> Hello World",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u2581", "<unk>"],
+      ids: [1, 31654, 0],
+      decoded: "<s> <unk>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u2581", "\u2581", "\u2581", "\u2581leading", "\u2581space"],
+      ids: [1, 31654, 31654, 31654, 7951, 7259],
+      decoded: "<s>    leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581tra", "iling", "\u2581space", "\u2581", "\u2581", "\u2581"],
+      ids: [1, 2036, 9850, 7259, 31654, 31654, 31654],
+      decoded: "<s> trailing space   ",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["\u2581H", "i", "\u2581", "\u2581H", "ello"],
+      ids: [1, 207, 31673, 31654, 207, 3589],
+      decoded: "<s> Hi  Hello",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["\u2581test", "\u2581", "$", "1", "\u2581R", "2", "\u2581", "#", "3", "\u2581", "\u20ac", "4", "\u2581", "\u00a3", "5", "\u2581", "<unk>", "6", "\u2581", "<unk>", "7", "\u2581", "\u20b9", "8", "\u2581", "<unk>", "9", "\u2581test"],
+      ids: [1, 6370, 31654, 9, 12, 947, 13, 31654, 22, 14, 31654, 31746, 15, 31654, 31792, 16, 31654, 0, 17, 31654, 0, 18, 31654, 31999, 19, 31654, 0, 20, 6370],
+      decoded: "<s> test $1 R2 #3 \u20ac4 \u00a35 <unk>6 <unk>7 \u20b98 <unk>9 test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["\u2581I", "\u2581bought", "\u2581an", "\u2581apple", "\u2581for", "\u2581", "$", "1", ".", "0", "0", "\u2581at", "\u2581the", "\u2581store", "."],
+      ids: [1, 320, 4685, 446, 4223, 347, 31654, 9, 12, 34, 21, 21, 586, 70, 2023, 34],
+      decoded: "<s> I bought an apple for $1.00 at the store.",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u2581you", "<unk>", "\u2581", "\u2581"],
+      ids: [1, 356, 0, 31654, 31654],
+      decoded: "<s> you<unk>  ",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u2581you", "<unk>"],
+      ids: [1, 356, 0],
+      decoded: "<s> you<unk>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u2581you", "<unk>", "you", "<unk>"],
+      ids: [1, 356, 0, 21984, 0],
+      decoded: "<s> you<unk>you<unk>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["\u2581weird", "\u2581", "<unk>", "\u2581edge", "\u2581", "<unk>", "\u2581case"],
+      ids: [1, 7865, 31654, 0, 11148, 31654, 0, 10143],
+      decoded: "<s> weird <unk> edge <unk> case",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581", "\u2581This", "\u2581", "\u2581is", "\u2581", "\u2581a", "\u2581", "\u2581test", "\u2581", "\u2581", "."],
+      ids: [1, 31654, 3827, 31654, 344, 31654, 48, 31654, 6370, 31654, 31654, 34],
+      decoded: "<s>  This  is  a  test  .",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>"],
+      ids: [1, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0],
+      decoded: "<s> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u200d", "<unk>", "\u2581", "<unk>", "\u200d", "<unk>", "\u2581", "<unk>", "\u200d", "<unk>", "\u2581", "<unk>", "\u200d", "<unk>", "\u200d", "<unk>", "\u2581", "<unk>", "\u200d", "<unk>", "\u200d", "<unk>", "\u200d", "<unk>", "\u2581", "<unk>", "\u200d", "<unk>", "\u200d", "<unk>", "\u200d", "<unk>", "\u2581", "<unk>", "\u200d", "<unk>", "\u200d", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u200d", "<unk>", "\u200d", "<unk>", "\u200d", "<unk>"],
+      ids: [1, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31654, 0, 31928, 0, 31654, 0, 31928, 0, 31654, 0, 31928, 0, 31654, 0, 31928, 0, 31928, 0, 31654, 0, 31928, 0, 31928, 0, 31928, 0, 31654, 0, 31928, 0, 31928, 0, 31928, 0, 31654, 0, 31928, 0, 31928, 0, 31654, 0, 31654, 0, 31928, 0, 31928, 0, 31928, 0],
+      decoded: "<s> <unk> <unk> <unk> <unk> <unk>\u200d<unk> <unk>\u200d<unk> <unk>\u200d<unk> <unk>\u200d<unk>\u200d<unk> <unk>\u200d<unk>\u200d<unk>\u200d<unk> <unk>\u200d<unk>\u200d<unk>\u200d<unk> <unk>\u200d<unk>\u200d<unk> <unk> <unk>\u200d<unk>\u200d<unk>\u200d<unk>",
+    },
+    BPE_SCORES_PRIORITY_1: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_1,
+      tokens: ["\u2581grabbed"],
+      ids: [1, 3618],
+      decoded: "<s> grabbed",
+    },
+    BPE_SCORES_PRIORITY_2: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_2,
+      tokens: ["\u2581", "\u2581grabbed"],
+      ids: [1, 31654, 3618],
+      decoded: "<s>  grabbed",
+    },
+    BPE_SCORES_PRIORITY_3: {
+      text: LLAMA_TEST_STRINGS.BPE_SCORES_PRIORITY_3,
+      tokens: ["\u2581", "\u2581", "\u2581", "\u2581", "\u2581", "\u2581", "\u2581", "\u2581", "\u2581", "\u2581", "\u2581", "\u2581grabbed"],
+      ids: [1, 31654, 31654, 31654, 31654, 31654, 31654, 31654, 31654, 31654, 31654, 31654, 3618],
+      decoded: "<s>            grabbed",
+    },
+    NEWLINE: {
+      text: LLAMA_TEST_STRINGS.NEWLINE,
+      tokens: ["\u2581", "\n"],
+      ids: [1, 31654, 5],
+      decoded: "<s> \n",
+    },
+    NEWLINE_WITH_LEADING_SPACE: {
+      text: LLAMA_TEST_STRINGS.NEWLINE_WITH_LEADING_SPACE,
+      tokens: ["\u2581", "\u2581", "\n"],
+      ids: [1, 31654, 31654, 5],
+      decoded: "<s>  \n",
+    },
+    TABS: {
+      text: LLAMA_TEST_STRINGS.TABS,
+      tokens: ["\u2581", "<unk>", "t", "ab", "s", "<unk>", "out", "\u2581here"],
+      ids: [1, 31654, 0, 31665, 878, 31675, 0, 415, 3278],
+      decoded: "<s> <unk>tabs<unk>out here",
+    },
+    NEWLINE_AND_TAB: {
+      text: LLAMA_TEST_STRINGS.NEWLINE_AND_TAB,
+      tokens: ["\u2581", "\n", "<unk>", "\n"],
+      ids: [1, 31654, 5, 0, 5],
+      decoded: "<s> \n<unk>\n",
+    },
+    CHINESE_LETTER: {
+      text: LLAMA_TEST_STRINGS.CHINESE_LETTER,
+      tokens: ["\u2581", "<unk>"],
+      ids: [1, 31654, 0],
+      decoded: "<s> <unk>",
+    },
+    EMOJIS_1: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_1,
+      tokens: ["\u2581", "<unk>"],
+      ids: [1, 31654, 0],
+      decoded: "<s> <unk>",
+    },
+    EMOJIS_2: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_2,
+      tokens: ["\u2581", "<unk>"],
+      ids: [1, 31654, 0],
+      decoded: "<s> <unk>",
+    },
+    EMOJIS_3: {
+      text: LLAMA_TEST_STRINGS.EMOJIS_3,
+      tokens: ["\u2581", "<unk>"],
+      ids: [1, 31654, 0],
+      decoded: "<s> <unk>",
+    },
+    PARAGRAPH: {
+      text: LLAMA_TEST_STRINGS.PARAGRAPH,
+      tokens: ["\u2581The", "\u2581l", "l", "ama", "\u2581", "(", "/", "\u02c8", "l", "\u0251", "\u02d0", "m", "\u0259", "/", ";", "\u2581", "<unk>", "Sp", "an", "ish", "\u2581pr", "on", "un", "ci", "ation", ":", "\u2581", "[", "\u02c8", "<unk>", "ama", "]", ")", "\u2581", "(", "L", "ama", "\u2581gl", "ama", ")", "\u2581is", "\u2581a", "\u2581d", "om", "est", "ic", "ated", "\u2581South", "\u2581American", "\u2581cam", "el", "id", ",", "\u2581wid", "ely", "\u2581used", "\u2581as", "\u2581a", "\u2581meat", "\u2581and", "\u2581pack", "\u2581animal", "\u2581by", "\u2581And", "e", "an", "\u2581c", "ult", "ures", "\u2581since", "\u2581the", "\u2581P", "re", "-", "C", "ol", "umb", "ian", "\u2581", "era", ".", "\u2581L", "l", "am", "as", "\u2581are", "\u2581social", "\u2581animals", "\u2581and", "\u2581live", "\u2581with", "\u2581others", "\u2581as", "\u2581a", "\u2581her", "d", ".", "\u2581Their", "\u2581wool", "\u2581is", "\u2581soft", "\u2581and", "\u2581contains", "\u2581only", "\u2581a", "\u2581small", "\u2581amount", "\u2581of", "\u2581l", "an", "ol", "in", ".", "[", "2", "]", "\u2581L", "l", "am", "as", "\u2581can", "\u2581learn", "\u2581simple", "\u2581tasks", "\u2581after", "\u2581a", "\u2581few", "\u2581rep", "et", "itions", ".", "\u2581When", "\u2581using", "\u2581a", "\u2581pack", ",", "\u2581they", "\u2581can", "\u2581carry", "\u2581about", "\u2581", "2", "5", "\u2581to", "\u2581", "3", "0", "%", "\u2581of", "\u2581their", "\u2581body", "\u2581weight", "\u2581for", "\u2581", "8", "\u2581to", "\u2581", "1", "3", "\u2581km", "\u2581", "(", "5", "\u2013", "8", "\u2581miles", ")", ".", "[", "3", "]", "\u2581The", "\u2581name", "\u2581l", "l", "ama", "\u2581", "(", "in", "\u2581the", "\u2581past", "\u2581also", "\u2581spell", "ed", '\u2581"', "l", "ama", '"', "\u2581or", '\u2581"', "gl", "ama", '"', ")", "\u2581was", "\u2581adop", "ted", "\u2581by", "\u2581E", "urope", "an", "\u2581sett", "l", "ers", "\u2581from", "\u2581n", "ative", "\u2581Per", "u", "v", "ians", ".", "[", "4", "]", "\u2581The", "\u2581an", "c", "est", "ors", "\u2581of", "\u2581l", "l", "am", "as", "\u2581are", "\u2581thought", "\u2581to", "\u2581have", "\u2581origin", "ated", "\u2581from", "\u2581the", "\u2581Great", "\u2581Pl", "ain", "s", "\u2581of", "\u2581North", "\u2581America", "\u2581about", "\u2581", "4", "0", "\u2581million", "\u2581years", "\u2581ago", ",", "\u2581and", "\u2581sub", "sequ", "ently", "\u2581m", "ig", "r", "ated", "\u2581to", "\u2581South", "\u2581America", "\u2581about", "\u2581three", "\u2581million", "\u2581years", "\u2581ago", "\u2581during", "\u2581the", "\u2581Great", "\u2581American", "\u2581Int", "er", "ch", "ange", ".", "\u2581By", "\u2581the", "\u2581end", "\u2581of", "\u2581the", "\u2581last", "\u2581ice", "\u2581age", "\u2581", "(", "1", "0", ",", "0", "0", "0", "\u2013", "1", "2", ",", "0", "0", "0", "\u2581years", "\u2581ago", ")", ",", "\u2581cam", "el", "ids", "\u2581were", "\u2581ext", "inct", "\u2581in", "\u2581North", "\u2581America", ".", "[", "3", "]", "\u2581As", "\u2581of", "\u2581", "2", "0", "0", "7", ",", "\u2581there", "\u2581were", "\u2581over", "\u2581seven", "\u2581million", "\u2581l", "l", "am", "as", "\u2581and", "\u2581al", "p", "ac", "as", "\u2581in", "\u2581South", "\u2581America", "\u2581and", "\u2581over", "\u2581", "1", "5", "8", ",", "0", "0", "0", "\u2581l", "l", "am", "as", "\u2581and", "\u2581", "1", "0", "0", ",", "0", "0", "0", "<unk>", "\u2581al", "p", "ac", "as", ",", "\u2581des", "ce", "nd", "ed", "\u2581from", "\u2581pro", "gen", "it", "ors", "\u2581import", "ed", "\u2581late", "\u2581in", "\u2581the", "\u2581", "2", "0", "th", "\u2581cent", "ury", ",", "\u2581in", "\u2581the", "\u2581United", "\u2581States", "\u2581and", "\u2581Can", "ada", ".", "[", "5", "]", "\u2581In", "\u2581A", "ym", "ara", "\u2581my", "th", "ology", ",", "\u2581l", "l", "am", "as", "\u2581are", "\u2581important", "\u2581be", "ings", ".", "\u2581The", "\u2581He", "aven", "ly", "\u2581L", "l", "ama", "\u2581is", "\u2581said", "\u2581to", "\u2581drink", "\u2581water", "\u2581from", "\u2581the", "\u2581ocean", "\u2581and", "\u2581ur", "in", "ates", "\u2581as", "\u2581it", "\u2581rains", ".", "[", "6", "]", "\u2581Acc", "ord", "ing", "\u2581to", "\u2581A", "ym", "ara", "\u2581es", "ch", "at", "ology", ",", "\u2581l", "l", "am", "as", "\u2581will", "\u2581return", "\u2581to", "\u2581the", "\u2581water", "\u2581spr", "ings", "\u2581and", "\u2581l", "ag", "oons", "\u2581where", "\u2581they", "\u2581come", "\u2581from", "\u2581at", "\u2581the", "\u2581end", "\u2581of", "\u2581time", ".", "[", "6", "]"],
+      ids: [1, 147, 105, 31683, 4464, 31654, 32, 31753, 31774, 31683, 31813, 31779, 31687, 31781, 31753, 30, 31654, 0, 30106, 142, 531, 1823, 111, 367, 8762, 633, 29, 31654, 31778, 31774, 0, 4464, 31780, 33, 31654, 32, 31717, 4464, 1861, 4464, 33, 344, 48, 108, 120, 504, 515, 3062, 29052, 18424, 8829, 256, 153, 35, 20517, 2001, 2680, 488, 48, 9910, 83, 4314, 1448, 1015, 1736, 31660, 142, 103, 3441, 605, 13397, 70, 1629, 86, 7, 31739, 819, 4618, 1685, 31654, 7129, 34, 218, 31683, 235, 691, 617, 23632, 1707, 83, 5860, 249, 2905, 488, 48, 192, 31679, 34, 5290, 11964, 344, 3077, 83, 12959, 2859, 48, 1388, 7238, 251, 105, 142, 819, 81, 34, 31778, 13, 31780, 218, 31683, 235, 691, 645, 907, 16188, 22936, 1609, 48, 4505, 4706, 183, 29049, 34, 1354, 5247, 48, 4314, 35, 338, 645, 4923, 1096, 31654, 13, 16, 84, 31654, 14, 21, 10, 251, 626, 6011, 9152, 347, 31654, 19, 84, 31654, 12, 14, 29496, 31654, 32, 16, 31760, 19, 7843, 33, 34, 31778, 14, 31780, 147, 3516, 105, 31683, 4464, 31654, 32, 81, 70, 4829, 2320, 9948, 78, 245, 31683, 4464, 31690, 1187, 245, 686, 4464, 31690, 33, 139, 25228, 2490, 1015, 465, 25799, 142, 16405, 31683, 983, 825, 152, 12724, 24466, 31688, 31711, 26361, 34, 31778, 15, 31780, 147, 446, 31692, 504, 4166, 251, 105, 31683, 235, 691, 617, 1302, 84, 649, 7206, 3062, 825, 70, 27718, 12966, 588, 31675, 251, 26698, 27393, 1096, 31654, 15, 21, 23109, 3514, 17246, 35, 83, 5097, 17541, 19560, 114, 258, 31678, 3062, 84, 29052, 27393, 1096, 2765, 23109, 3514, 17246, 5823, 70, 27718, 18424, 25473, 98, 345, 3292, 34, 15498, 70, 1645, 251, 70, 6103, 2802, 13463, 31654, 32, 12, 21, 35, 21, 21, 21, 31760, 12, 13, 35, 21, 21, 21, 3514, 17246, 33, 35, 8829, 256, 16185, 579, 7522, 21465, 198, 26698, 27393, 34, 31778, 14, 31780, 1822, 251, 31654, 13, 21, 21, 18, 35, 478, 579, 1407, 20358, 23109, 105, 31683, 235, 691, 83, 789, 31694, 1324, 691, 198, 29052, 27393, 83, 1407, 31654, 12, 16, 19, 35, 21, 21, 21, 105, 31683, 235, 691, 83, 31654, 12, 21, 21, 35, 21, 21, 21, 0, 789, 31694, 1324, 691, 35, 3601, 215, 65, 78, 825, 2482, 8170, 93, 4166, 1777, 78, 5359, 198, 70, 31654, 13, 21, 1671, 11823, 11325, 35, 198, 70, 17562, 18843, 83, 3226, 19507, 34, 31778, 16, 31780, 2266, 231, 10586, 1362, 1286, 1671, 25316, 35, 105, 31683, 235, 691, 617, 2288, 233, 826, 34, 147, 264, 21794, 321, 218, 31683, 4464, 344, 309, 84, 4057, 1357, 825, 70, 5187, 83, 9947, 81, 4897, 488, 182, 24761, 34, 31778, 17, 31780, 28616, 4173, 127, 84, 231, 10586, 1362, 4469, 345, 122, 25316, 35, 105, 31683, 235, 691, 1214, 3520, 84, 70, 1357, 12312, 826, 83, 105, 762, 31431, 1930, 338, 1909, 825, 586, 70, 1645, 251, 470, 34, 31778, 17, 31780],
+      decoded: '<s> The llama (/\u02c8l\u0251\u02d0m\u0259/; <unk>Spanish pronunciation: [\u02c8<unk>ama]) (Lama glama) is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the Pre-Columbian era. Llamas are social animals and live with others as a herd. Their wool is soft and contains only a small amount of lanolin.[2] Llamas can learn simple tasks after a few repetitions. When using a pack, they can carry about 25 to 30% of their body weight for 8 to 13 km (5\u20138 miles).[3] The name llama (in the past also spelled "lama" or "glama") was adopted by European settlers from native Peruvians.[4] The ancestors of llamas are thought to have originated from the Great Plains of North America about 40 million years ago, and subsequently migrated to South America about three million years ago during the Great American Interchange. By the end of the last ice age (10,000\u201312,000 years ago), camelids were extinct in North America.[3] As of 2007, there were over seven million llamas and alpacas in South America and over 158,000 llamas and 100,000<unk> alpacas, descended from progenitors imported late in the 20th century, in the United States and Canada.[5] In Aymara mythology, llamas are important beings. The Heavenly Llama is said to drink water from the ocean and urinates as it rains.[6] According to Aymara eschatology, llamas will return to the water springs and lagoons where they come from at the end of time.[6]',
+    },
+  },
+};
+
+const MAX_EXECUTION_TIME = 10_000;
+export const CUSTOM_TESTS = () => {
+  // Tests to ensure that no matter what, the correct tokenization is returned.
+  // This is necessary since there are sometimes bugs in the transformers library.
+  describe("hard-coded", () => {
+    const TESTS = {
+      "Xenova/llama-tokenizer": [
+        // Test legacy compatibility
+        {
+          // legacy unset => legacy=true
+          // NOTE: While incorrect, it is necessary to match legacy behaviour
+          data: {
+            "<s>\n": [1, 29871, 13],
+          },
+          legacy: null,
+        },
+        {
+          // override legacy=true (same results as above)
+          data: {
+            "<s>\n": [1, 29871, 13],
+          },
+          legacy: true,
+        },
+        {
+          // override legacy=false (fixed results)
+          data: {
+            "<s>\n": [1, 13],
+          },
+          legacy: false,
+        },
+      ],
+
+      "Xenova/llama-tokenizer_new": [
+        // legacy=false
+        {
+          data: {
+            " </s> 1  2   3    4   ": [259, 2, 29871, 29896, 259, 29906, 1678, 29941, 268, 29946, 1678],
+            "<s>\n": [1, 13],
+            "</s>test</s>": [2, 1688, 2],
+            " </s> test </s> ": [259, 2, 1243, 29871, 2, 29871],
+            "A\n'll": [319, 13, 29915, 645],
+            "Hey </s>. how are you": [18637, 29871, 2, 29889, 920, 526, 366],
+            "  Hi  Hello  ": [259, 6324, 29871, 15043, 259],
+          },
+          reversible: true,
+          legacy: null,
+        },
+        {
+          // override legacy=true (incorrect results, but necessary to match legacy behaviour)
+          data: {
+            "<s>\n": [1, 29871, 13],
+          },
+          legacy: true,
+        },
+      ],
+
+      // new serialization format (tokenizers >= 0.20.0)
+      // BPE merges are now [string, string][] instead of string[]
+      "Xenova/Llama-3.2-Tokenizer": [
+        {
+          data: {
+            "hello world": [15339, 1917],
+            " belirtilen": [120909],
+          },
+          reversible: true,
+        },
+
+        // Test ignore_merges=false
+        {
+          data: {
+            "hello world": [15339, 1917],
+            " belirtilen": [101664, 1678, 268],
+          },
+          reversible: true,
+          override: (tokenizer) => {
+            tokenizer.model.ignore_merges = false;
+          },
+        },
+      ],
+    };
+
+    // Re-use the same tests for the llama2 tokenizer
+    TESTS["Xenova/llama2-tokenizer"] = TESTS["Xenova/llama-tokenizer_new"];
+
+    for (const [tokenizerName, test_data] of Object.entries(TESTS)) {
+      it(
+        tokenizerName,
+        async () => {
+          for (const { data, reversible, legacy, override } of test_data) {
+            const tokenizer = await LlamaTokenizer.from_pretrained(tokenizerName, { legacy });
+            if (override) {
+              override(tokenizer);
+            }
+            for (const [text, expected] of Object.entries(data)) {
+              const token_ids = tokenizer.encode(text, { add_special_tokens: false });
+              expect(token_ids).toEqual(expected);
+
+              // If reversible, test that decoding produces the original text
+              if (reversible) {
+                const decoded = tokenizer.decode(token_ids);
+                expect(decoded).toEqual(text);
+              }
+            }
+          }
+        },
+        MAX_EXECUTION_TIME,
+      );
+    }
+  });
+};
diff --git a/tests/models/m2m_100/tokenization.js b/tests/models/m2m_100/tokenization.js
new file mode 100644
index 000000000..6af51f44f
--- /dev/null
+++ b/tests/models/m2m_100/tokenization.js
@@ -0,0 +1,179 @@
+import { M2M100Tokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, M2M_100_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = M2M100Tokenizer;
+
+// NOTE: The slow tokenizer (used by transformers) has minor inconsistencies against the fast tokenizer.
+// For this reason, we may override the expected results for certain tests.
+export const TEST_CONFIG = {
+  "Xenova/m2m100_418M": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["\u2581How", "\u2581are", "\u2581you", "\u2581doing", "?"],
+      ids: [128022, 34226, 4234, 8251, 123047, 24, 2],
+      decoded: "__en__ How are you doing?</s>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["\u2581You", "\u2581should", "'", "ve", "\u2581done", "\u2581this"],
+      ids: [128022, 14921, 119092, 12, 470, 111108, 15911, 2],
+      decoded: "__en__ You should've done this</s>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["\u25810", "123", "45", "6", "78", "9", "\u25810", "\u25811", "\u25812", "\u25813", "\u25814", "\u25815", "\u25816", "\u25817", "\u25818", "\u25819", "\u258110", "\u2581100", "\u25811000"],
+      ids: [128022, 847, 78596, 3834, 435, 7049, 718, 847, 161, 168, 205, 273, 265, 376, 442, 455, 572, 301, 1245, 7336, 2],
+      decoded: "__en__ 0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000</s>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["\u2581The", "\u2581company", "\u2581was", "\u2581found", "ed", "\u2581in", "\u25812016."],
+      ids: [128022, 1658, 66486, 1513, 118728, 241, 28, 8860, 2],
+      decoded: "__en__ The company was founded in 2016.</s>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["\u2581A", "\u2581'", "ll", "\u2581!!", "to", "?'", "d", "''", "d", "\u2581of", ",", "\u2581can", "'", "t", "."],
+      ids: [128022, 58, 244, 2279, 9403, 428, 72956, 173, 8471, 173, 432, 4, 3154, 12, 88, 5, 2],
+      decoded: "__en__ A 'll!!to?'d''d of, can't.</s>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["\u2581def", "\u2581main", "(", "):", "\u2581pass"],
+      ids: [128022, 8268, 9359, 249, 2825, 4799, 2],
+      decoded: "__en__ def main(): pass</s>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["\u2581let", "\u2581a", "\u2581=", "\u2581ob", "j", ".", "to", "Str", "ing", "(", ");", "\u2581to", "Str", "ing", "(", ");"],
+      ids: [128022, 2507, 8, 3255, 607, 189, 5, 428, 41549, 150, 249, 5294, 128, 41549, 150, 249, 5294, 2],
+      decoded: "__en__ let a = obj.toString(); toString();</s>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["\u2581This", "\u2581is", "\u2581a", "\u2581test", "."],
+      ids: [128022, 36606, 117, 8, 4183, 5, 2],
+      decoded: "__en__ This is a test.</s>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["\u2581UN", "want", "\u00e9d", ",", "run", "ning"],
+      ids: [128022, 6984, 108054, 7151, 4, 18634, 656, 2],
+      decoded: "__en__ UNwant\u00e9d,running</s>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["\u25811", "\u0000", "2", "\u25813"],
+      ids: [128022, 161, 4163, 339, 205, 2],
+      decoded: "__en__ 1\u00002 3</s>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["\u2581Hello", "\u2581World"],
+      ids: [128022, 65761, 10581, 2],
+      decoded: "__en__ Hello World</s>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["\u2581hello", "\u2581world"],
+      ids: [128022, 110013, 55185, 2],
+      decoded: "__en__ hello world</s>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u2581", "\u751f\u6d3b", "\u7684", "\u771f", /* "\u8c1b" */ "<unk>", "\u662f"],
+      ids: [128022, 22, 8523, 80, 10418, 3, 775, 2],
+      decoded: "__en__ \u751f\u6d3b\u7684\u771f<unk>\u662f</s>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u2581leading", "\u2581space"],
+      ids: [128022, 124476, 118561, 2],
+      decoded: "__en__ leading space</s>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581tra", "iling", "\u2581space"],
+      ids: [128022, 1368, 19217, 118561, 2],
+      decoded: "__en__ trailing space</s>",
+    },
+    SURROUNDING_SPACE: {
+      text: BASE_TEST_STRINGS.SURROUNDING_SPACE,
+      tokens: ["\u2581surround", "ing", "\u2581space"],
+      ids: [128022, 124728, 150, 118561, 2],
+      decoded: "__en__ surrounding space</s>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["\u2581Hi", "\u2581Hello"],
+      ids: [128022, 7676, 65761, 2],
+      decoded: "__en__ Hi Hello</s>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["\u2581test", "\u2581$", "1", "\u2581R", "2", "\u2581#", "3", "\u2581\u20ac", "4", "\u2581\u00a3", "5", "\u2581", "\u00a5", "6", "\u2581", /* "\u20a3" */ "<unk>", "7", "\u2581", "\u20b9", "8", "\u2581", /* "\u20b1" */ "<unk>", "9", "\u2581test"],
+      ids: [128022, 4183, 4352, 451, 180, 339, 584, 425, 4257, 465, 13506, 679, 22, 43832, 435, 22, 3, 622, 22, 115056, 677, 22, 3, 718, 4183, 2],
+      decoded: "__en__ test $1 R2 #3 \u20ac4 \u00a35 \u00a56 <unk>7 \u20b98 <unk>9 test</s>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["\u2581I", "\u2581bought", "\u2581an", "\u2581ap", "ple", "\u2581for", "\u2581$", "1.", "00", "\u2581at", "\u2581the", "\u2581store", "."],
+      ids: [128022, 203, 127797, 48, 722, 6857, 193, 4352, 2023, 1365, 120, 1197, 9160, 5, 2],
+      decoded: "__en__ I bought an apple for $1.00 at the store.</s>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u2581you", "..."],
+      ids: [128022, 8251, 26, 2],
+      decoded: "__en__ you...</s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u2581you", "..."],
+      ids: [128022, 8251, 26, 2],
+      decoded: "__en__ you...</s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u2581you", "...", "\u2581you", "..."],
+      ids: [128022, 8251, 26, 8251, 26, 2],
+      decoded: "__en__ you... you...</s>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["\u2581we", "ird", "\u2581", "\uff5e", "\u2581ed", "ge", "\u2581", "\uff5e", "\u2581case"],
+      ids: [128022, 1710, 13067, 22, 14691, 1500, 568, 22, 14691, 24306, 2],
+      decoded: "__en__ weird \uff5e edge \uff5e case</s>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581This", "\u2581is", "\u2581a", "\u2581test", "\u2581."],
+      ids: [128022, 36606, 117, 8, 4183, 237, 2],
+      decoded: "__en__ This is a test.</s>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u2581", "\ud83d\ude02", "\u2581", "\ud83d\udc4d", "\u2581", "\ud83e\udd23", "\u2581", "\ud83d\ude0d", "\u2581", "\ud83d\ude2d", "\u2581", "\ud83c\udf89", "\u2581", "\ud83d\ude4f", "\u2581", "\ud83d\ude0a", "\u2581", "\ud83d\udd25", "\u2581", "\ud83d\ude01", "\u2581", "\ud83d\ude05", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\ude06", "\u2581", "\ud83d\udc4f", "\u2581\u2764", "\ufe0f", "\u2581", "\ud83d\udc9c", "\u2581", "\ud83d\udc9a", "\u2581", "\ud83d\udc97", "\u2581", "\ud83d\udc99", "\u2581", "\ud83d\udda4", "\u2581", "\ud83d\ude0e", "\u2581", "\ud83d\udc4c", "\u2581", /* "\ud83e\udd73" */ "<unk>", "\u2581", "\ud83d\udcaa", "\u2581", "\u2728", "\u2581", "\ud83d\udc49", "\u2581", "\ud83d\udc40", "\u2581", "\ud83d\udcaf", "\u2581", "\ud83c\udf88", "\u2581", "\ud83d\ude48", "\u2581", "\ud83d\ude4c", "\u2581", /* "\ud83d\udc80" */ "<unk>", "\u2581", "\ud83d\udc47", "\u2581", "\ud83d\udc4b", "\u2581", "\u2705", "\u2581", "\ud83c\udf81", "\u2581", /* "\ud83c\udf1e" */ "<unk>", "\u2581", "\ud83c\udf38", "\u2581", "\ud83d\udcb0"],
+      ids: [128022, 22, 74222, 22, 118514, 22, 124385, 22, 99683, 22, 123842, 22, 124821, 22, 117689, 22, 103111, 22, 121924, 22, 121088, 22, 124207, 22, 123955, 22, 120137, 22, 123534, 66038, 18905, 22, 125385, 22, 125317, 22, 126071, 22, 124787, 22, 127396, 22, 120119, 22, 122813, 22, 3, 22, 123482, 22, 120563, 22, 117995, 22, 127978, 22, 126507, 22, 127269, 22, 126179, 22, 125300, 22, 3, 22, 120807, 22, 127143, 22, 118682, 22, 125350, 22, 3, 22, 123790, 22, 126948, 2],
+      decoded: /* "__en__ \ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c <unk>\ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c <unk>\ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 <unk>\ud83c\udf38 \ud83d\udcb0</s>" */ "__en__ \ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c <unk> \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c <unk> \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 <unk> \ud83c\udf38 \ud83d\udcb0</s>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2581", "\u2728", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\udc41", "\ufe0f", "\u2581", /* "\ud83d\udc71" */ "<unk>", "\ud83c\udffb", "\u2581", /* "\ud83d\udd75" */ "<unk>", "\u2581", "\u2642", "\ufe0f", "\u2581", /* "\ud83e\uddd9" */ "<unk>", "\ud83c\udffb", "\u2581", "\u2642", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u2581", /* "\ud83c\udf3e" */ "<unk>", "\u2581", /* "\ud83e\uddd1" */ "<unk>", "\u2581", /* "\ud83e\udd1d" */ "<unk>", "\u2581", /* "\ud83e\uddd1" */ "<unk>", "\u2581", "\ud83d\udc69", "\u2581\u2764", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc69", "\u2581", /* "\ud83d\udc67" */ "<unk>", "\u2581", /* "\ud83d\udc66" */ "<unk>", "\u2581", /* "\ud83e\uddd1" */ "<unk>", "\ud83c\udffb", "\u2581", /* "\ud83e\udd1d" */ "<unk>", "\u2581", /* "\ud83e\uddd1" */ "<unk>", "\ud83c\udffb", "\u2581", /* "\ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f"*/ "<unk>", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u2581\u2764", "\ufe0f", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\ud83c\udffc"],
+      ids: [128022, 22, 120563, 22, 123955, 22, 121442, 18905, 22, 3, 116617, 22, 3, 22, 122517, 18905, 22, 3, 116617, 22, 122517, 22, 127603, 116617, 22, 3, 22, 3, 22, 3, 22, 3, 22, 126739, 66038, 22, 126237, 22, 127603, 22, 126739, 22, 126739, 22, 3, 22, 3, 22, 3, 116617, 22, 3, 22, 3, 116617, 22, 3, 22, 127603, 116617, 66038, 18905, 22, 126237, 22, 127603, 123285, 2],
+      decoded: /* "__en__ \u2728 \ud83e\udd17 \ud83d\udc41\ufe0f <unk>\ud83c\udffb <unk>\u2642\ufe0f <unk>\ud83c\udffb \u2642 \ud83d\udc68\ud83c\udffb <unk><unk><unk><unk>\ud83d\udc69 \u2764 \ud83d\udc8b \ud83d\udc68 \ud83d\udc69 \ud83d\udc69 <unk><unk><unk>\ud83c\udffb <unk><unk>\ud83c\udffb <unk>\ud83d\udc68\ud83c\udffb \u2764\ufe0f \ud83d\udc8b \ud83d\udc68\ud83c\udffc</s>" */ "__en__ \u2728 \ud83e\udd17 \ud83d\udc41\ufe0f <unk>\ud83c\udffb <unk> \u2642\ufe0f <unk>\ud83c\udffb \u2642 \ud83d\udc68\ud83c\udffb <unk> <unk> <unk> <unk> \ud83d\udc69 \u2764 \ud83d\udc8b \ud83d\udc68 \ud83d\udc69 \ud83d\udc69 <unk> <unk> <unk>\ud83c\udffb <unk> <unk>\ud83c\udffb <unk> \ud83d\udc68\ud83c\udffb \u2764\ufe0f \ud83d\udc8b \ud83d\udc68\ud83c\udffc</s>",
+    },
+    ONLY_WHITESPACE: {
+      text: BASE_TEST_STRINGS.ONLY_WHITESPACE,
+      tokens: [],
+      ids: [128022, 2],
+      decoded: /* "__en__ </s>" */ "__en__</s>",
+    },
+    TRANSLATION_INPUTS: {
+      text: M2M_100_TEST_STRINGS.TRANSLATION_INPUTS,
+      tokens: ["__en__", "\u2581hello", "\u2581world", "</s>"],
+      ids: [128022, 128022, 110013, 55185, 2, 2],
+      decoded: /* "__en__ __en__ hello world</s></s>" */ "__en____en__ hello world</s></s>",
+    },
+  },
+};
diff --git a/tests/models/mpnet/tokenization.js b/tests/models/mpnet/tokenization.js
new file mode 100644
index 000000000..13f688d73
--- /dev/null
+++ b/tests/models/mpnet/tokenization.js
@@ -0,0 +1,158 @@
+import { MPNetTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = MPNetTokenizer;
+export const TEST_CONFIG = {
+  "Xenova/all-mpnet-base-v2": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["how", "are", "you", "doing", "?"],
+      ids: [0, 2133, 2028, 2021, 2729, 1033, 2],
+      decoded: "<s> how are you doing? </s>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["you", "should", "'", "ve", "done", "this"],
+      ids: [0, 2021, 2327, 1009, 2314, 2593, 2027, 2],
+      decoded: "<s> you should've done this </s>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["01", "##23", "##45", "##6", "##7", "##8", "##9", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "100", "1000"],
+      ids: [0, 5894, 21930, 19965, 2579, 2585, 2624, 2687, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 2188, 2535, 6698, 2],
+      decoded: "<s> 0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000 </s>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["the", "company", "was", "founded", "in", "2016", "."],
+      ids: [0, 2000, 2198, 2005, 2635, 2003, 2359, 1016, 2],
+      decoded: "<s> the company was founded in 2016. </s>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["a", "'", "ll", "!", "!", "to", "?", "'", "d", "'", "'", "d", "of", ",", "can", "'", "t", "."],
+      ids: [0, 1041, 1009, 2226, 1003, 1003, 2004, 1033, 1009, 1044, 1009, 1009, 1044, 2001, 1014, 2068, 1009, 1060, 1016, 2],
+      decoded: "<s> a'll!! to?'d'' d of, can't. </s>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "main", "(", ")", ":", "pass"],
+      ids: [0, 13370, 2368, 1010, 1011, 1028, 3417, 2],
+      decoded: "<s> def main ( ) : pass </s>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "ob", "##j", ".", "to", "##st", "##ring", "(", ")", ";", "to", "##st", "##ring", "(", ")", ";"],
+      ids: [0, 2296, 1041, 1031, 27889, 3505, 1016, 2004, 3371, 4896, 1010, 1011, 1029, 2004, 3371, 4896, 1010, 1011, 1029, 2],
+      decoded: "<s> let a = obj. tostring ( ) ; tostring ( ) ; </s>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["this", "is", "a", "test", "."],
+      ids: [0, 2027, 2007, 1041, 3235, 1016, 2],
+      decoded: "<s> this is a test. </s>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["unwanted", ",", "running"],
+      ids: [0, 18166, 1014, 2774, 2],
+      decoded: "<s> unwanted, running </s>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["123"],
+      ids: [0, 13142, 2],
+      decoded: "<s> 123 </s>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["hello", "world"],
+      ids: [0, 7596, 2092, 2],
+      decoded: "<s> hello world </s>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello", "world"],
+      ids: [0, 7596, 2092, 2],
+      decoded: "<s> hello world </s>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u751f", "[UNK]", "\u7684", "\u771f", "[UNK]", "[UNK]"],
+      ids: [0, 1914, 104, 1920, 1925, 104, 104, 2],
+      decoded: "<s> \u751f [UNK] \u7684 \u771f [UNK] [UNK] </s>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["leading", "space"],
+      ids: [0, 2881, 2690, 2],
+      decoded: "<s> leading space </s>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["trailing", "space"],
+      ids: [0, 12546, 2690, 2],
+      decoded: "<s> trailing space </s>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["hi", "hello"],
+      ids: [0, 7636, 7596, 2],
+      decoded: "<s> hi hello </s>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "r", "##2", "#", "3", "\u20ac", "##4", "\u00a35", "\u00a5", "##6", "[UNK]", "\u20b9", "##8", "\u20b1", "##9", "test"],
+      ids: [0, 3235, 1006, 1019, 1058, 2479, 1005, 1021, 1578, 2553, 27817, 1075, 2579, 104, 1580, 2624, 1579, 2687, 3235, 2],
+      decoded: "<s> test $ 1 r2 # 3 \u20ac4 \u00a35 \u00a56 [UNK] \u20b98 \u20b19 test </s>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["i", "bought", "an", "apple", "for", "$", "1", ".", "00", "at", "the", "store", "."],
+      ids: [0, 1049, 4153, 2023, 6211, 2009, 1006, 1019, 1016, 4006, 2016, 2000, 3577, 1016, 2],
+      decoded: "<s> i bought an apple for $ 1. 00 at the store. </s>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u2026"],
+      ids: [0, 2021, 1533, 2],
+      decoded: "<s> you \u2026 </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u2026"],
+      ids: [0, 2021, 1533, 2],
+      decoded: "<s> you \u2026 </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u2026", "you", "\u2026"],
+      ids: [0, 2021, 1533, 2021, 1533, 2],
+      decoded: "<s> you \u2026 you \u2026 </s>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["weird", "\uff5e", "edge", "\uff5e", "case"],
+      ids: [0, 6885, 1999, 3345, 1999, 2557, 2],
+      decoded: "<s> weird \uff5e edge \uff5e case </s>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "."],
+      ids: [0, 104, 104, 104, 104, 104, 1016, 2],
+      decoded: "<s> [UNK] [UNK] [UNK] [UNK] [UNK]. </s>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [0, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 2],
+      decoded: "<s> [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] </s>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]", "[UNK]"],
+      ids: [0, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, 2],
+      decoded: "<s> [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] [UNK] </s>",
+    },
+  },
+};
diff --git a/tests/models/nllb/tokenization.js b/tests/models/nllb/tokenization.js
new file mode 100644
index 000000000..4cfc6592f
--- /dev/null
+++ b/tests/models/nllb/tokenization.js
@@ -0,0 +1,158 @@
+import { NllbTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = NllbTokenizer;
+export const TEST_CONFIG = {
+  "Xenova/nllb-200-distilled-600M": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["\u2581How", "\u2581are", "\u2581you", "\u2581doing", "?"],
+      ids: [256047, 13374, 2442, 1259, 34512, 248130, 2],
+      decoded: "eng_Latn How are you doing?</s>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["\u2581You", "\u2581should", "'", "ve", "\u2581done", "\u2581this"],
+      ids: [256047, 3555, 12516, 248116, 279, 27236, 3423, 2],
+      decoded: "eng_Latn You should've done this</s>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["\u25810", "123", "45", "67", "89", "\u25810", "\u25811", "\u25812", "\u25813", "\u25814", "\u25815", "\u25816", "\u25817", "\u25818", "\u25819", "\u258110", "\u2581100", "\u25811000"],
+      ids: [256047, 4097, 232903, 25497, 37462, 42763, 4097, 94, 140, 315, 436, 481, 617, 757, 799, 855, 772, 3037, 18041, 2],
+      decoded: "eng_Latn 0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000</s>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["\u2581The", "\u2581company", "\u2581was", "\u2581found", "ed", "\u2581in", "\u25812016."],
+      ids: [256047, 1617, 32796, 1398, 26710, 76, 108, 31889, 2],
+      decoded: "eng_Latn The company was founded in 2016.</s>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["\u2581A", "\u2581'", "ll", "\u2581!!", "to", "?'", "d", "'", "'", "d", "\u2581of", ",", "\u2581can", "'", "t", "."],
+      ids: [256047, 70, 238, 1015, 12434, 208, 7358, 248072, 248116, 248116, 248072, 452, 248079, 2125, 248116, 248065, 248075, 2],
+      decoded: "eng_Latn A 'll!!to?'d''d of, can't.</s>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["\u2581def", "\u2581main", "(", "):", "\u2581pass"],
+      ids: [256047, 9274, 8385, 248168, 9481, 5800, 2],
+      decoded: "eng_Latn def main(): pass</s>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["\u2581let", "\u2581a", "\u2581=", "\u2581ob", "j", ".", "to", "Str", "ing", "(", ");", "\u2581to", "Str", "ing", "(", ");"],
+      ids: [256047, 3190, 9, 5636, 859, 248086, 248075, 208, 134293, 87, 248168, 12387, 202, 134293, 87, 248168, 12387, 2],
+      decoded: "eng_Latn let a = obj.toString(); toString();</s>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["\u2581This", "\u2581is", "\u2581a", "\u2581test", "."],
+      ids: [256047, 9680, 248, 9, 7356, 248075, 2],
+      decoded: "eng_Latn This is a test.</s>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["\u2581UN", "want", "\u00e9d", ",", "run", "ning"],
+      ids: [256047, 16297, 41691, 11317, 248079, 8464, 888, 2],
+      decoded: "eng_Latn UNwant\u00e9d,running</s>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["\u25811", "<unk>", "2", "\u25813"],
+      ids: [256047, 94, 3, 248147, 315, 2],
+      decoded: "eng_Latn 1<unk>2 3</s>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["\u2581Hello", "\u2581World"],
+      ids: [256047, 94124, 13855, 2],
+      decoded: "eng_Latn Hello World</s>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["\u2581hello", "\u2581world"],
+      ids: [256047, 133863, 15697, 2],
+      decoded: "eng_Latn hello world</s>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u2581\u751f\u6d3b", "\u7684", "\u771f", "<unk>", "\u662f"],
+      ids: [256047, 182892, 248506, 249573, 3, 249221, 2],
+      decoded: "eng_Latn \u751f\u6d3b\u7684\u771f<unk>\u662f</s>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u2581leading", "\u2581space"],
+      ids: [256047, 151175, 72147, 2],
+      decoded: "eng_Latn leading space</s>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581tra", "iling", "\u2581space", "\u2581"],
+      ids: [256047, 1372, 21263, 72147, 248059, 2],
+      decoded: "eng_Latn trailing space </s>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["\u2581Hi", "\u2581Hello"],
+      ids: [256047, 2867, 94124, 2],
+      decoded: "eng_Latn Hi Hello</s>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["\u2581test", "\u2581$1", "\u2581R", "2", "\u2581#3", "\u2581\u20ac", "4", "\u2581\u00a3", "5", "\u2581", "\u00a5", "6", "\u2581", "<unk>", "7", "\u2581", "\u20b9", "8", "\u2581", "<unk>", "9", "\u2581test"],
+      ids: [256047, 7356, 68462, 250, 248147, 186447, 22935, 248215, 25400, 248210, 248059, 252351, 248262, 248059, 3, 248283, 248059, 254867, 248268, 248059, 3, 248212, 7356, 2],
+      decoded: "eng_Latn test $1 R2 #3 \u20ac4 \u00a35 \u00a56 <unk>7 \u20b98 <unk>9 test</s>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["\u2581I", "\u2581bought", "\u2581an", "\u2581apple", "\u2581for", "\u2581$", "1.", "00", "\u2581at", "\u2581the", "\u2581store", "."],
+      ids: [256047, 117, 177233, 111, 203152, 351, 4589, 3044, 460, 230, 349, 21087, 248075, 2],
+      decoded: "eng_Latn I bought an apple for $1.00 at the store.</s>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u2581you", "...", "\u2581"],
+      ids: [256047, 1259, 284, 248059, 2],
+      decoded: "eng_Latn you... </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u2581you", "...", "\u2581"],
+      ids: [256047, 1259, 284, 248059, 2],
+      decoded: "eng_Latn you... </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u2581you", "...", "\u2581you", "...", "\u2581"],
+      ids: [256047, 1259, 284, 1259, 284, 248059, 2],
+      decoded: "eng_Latn you... you... </s>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["\u2581weird", "\u2581", "<unk>", "\u2581ed", "ge", "\u2581", "<unk>", "\u2581case"],
+      ids: [256047, 197348, 248059, 3, 1074, 479, 248059, 3, 23555, 2],
+      decoded: "eng_Latn weird <unk> edge <unk> case</s>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581This", "\u2581is", "\u2581a", "\u2581test", "\u2581."],
+      ids: [256047, 9680, 248, 9, 7356, 81, 2],
+      decoded: "eng_Latn This is a test.</s>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u2581\ud83d\ude02", "\u2581", "\ud83d\udc4d", "\u2581", "\ud83e\udd23", "\u2581\ud83d\ude0d", "\u2581", "\ud83d\ude2d", "\u2581", "<unk>", "\u2581", "\ud83d\ude4f", "\u2581", "\ud83d\ude0a", "\u2581", "\ud83d\udd25", "\u2581", "\ud83d\ude01", "\u2581", "\ud83d\ude05", "\u2581", "\ud83e\udd17", "\u2581\ud83d\ude06", "\u2581", "\ud83d\udc4f", "\u2581\u2764", "\ufe0f", "\u2581", "\ud83d\udc9c", "\u2581", "\ud83d\udc9a", "\u2581", "<unk>", "\u2581", "\ud83d\udc99", "\u2581", "<unk>", "\u2581", "\ud83d\ude0e", "\u2581", "\ud83d\udc4c", "\u2581", "<unk>", "\u2581", "\ud83d\udcaa", "\u2581", "\u2728", "\u2581", "\ud83d\udc49", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "\ud83d\ude48", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "\ud83d\udc47", "\u2581", "<unk>", "\u2581", "\u2705", "\u2581", "\ud83c\udf81", "\u2581", "<unk>", "\u2581", "\ud83c\udf38", "\u2581", "<unk>"],
+      ids: [256047, 104709, 248059, 253416, 248059, 253516, 241830, 248059, 253476, 248059, 3, 248059, 253443, 248059, 253515, 248059, 254402, 248059, 253288, 248059, 253776, 248059, 255232, 147677, 248059, 255420, 82495, 251759, 248059, 255742, 248059, 255949, 248059, 3, 248059, 255649, 248059, 3, 248059, 254297, 248059, 254723, 248059, 3, 248059, 255515, 248059, 254957, 248059, 253985, 248059, 3, 248059, 3, 248059, 3, 248059, 255855, 248059, 3, 248059, 3, 248059, 255354, 248059, 3, 248059, 254268, 248059, 255879, 248059, 3, 248059, 255952, 248059, 3, 2],
+      decoded: "eng_Latn \ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d <unk> \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a <unk> \ud83d\udc99 <unk> \ud83d\ude0e \ud83d\udc4c <unk> \ud83d\udcaa \u2728 \ud83d\udc49 <unk> <unk> <unk> \ud83d\ude48 <unk> <unk> \ud83d\udc47 <unk> \u2705 \ud83c\udf81 <unk> \ud83c\udf38 <unk></s>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2581", "\u2728", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\udc41", "\ufe0f", "\u2581", "<unk>", "\ud83c\udffb", "\u2581", "<unk>", "\u2581", "\u2642", "\ufe0f", "\u2581", "<unk>", "\ud83c\udffb", "\u2581", "\u2642", "\u2581", "<unk>", "\ud83c\udffb", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581\u2764", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\u2581", "<unk>", "\ud83c\udffb", "\u2581", "<unk>", "\u2581", "<unk>", "\ud83c\udffb", "\u2581", "<unk>", "\u2581", "<unk>", "\ud83c\udffb", "\u2581\u2764", "\ufe0f", "\u2581", "<unk>", "\u2581", "<unk>", "\ud83c\udffc"],
+      ids: [256047, 248059, 254957, 248059, 255232, 248059, 255123, 251759, 248059, 3, 254422, 248059, 3, 248059, 255331, 251759, 248059, 3, 254422, 248059, 255331, 248059, 3, 254422, 248059, 3, 248059, 3, 248059, 3, 248059, 3, 248059, 3, 82495, 248059, 3, 248059, 3, 248059, 3, 248059, 3, 248059, 3, 248059, 3, 248059, 3, 254422, 248059, 3, 248059, 3, 254422, 248059, 3, 248059, 3, 254422, 82495, 251759, 248059, 3, 248059, 3, 255832, 2],
+      decoded: "eng_Latn \u2728 \ud83e\udd17 \ud83d\udc41\ufe0f <unk>\ud83c\udffb <unk> \u2642\ufe0f <unk>\ud83c\udffb \u2642 <unk>\ud83c\udffb <unk> <unk> <unk> <unk> <unk> \u2764 <unk> <unk> <unk> <unk> <unk> <unk> <unk>\ud83c\udffb <unk> <unk>\ud83c\udffb <unk> <unk>\ud83c\udffb \u2764\ufe0f <unk> <unk>\ud83c\udffc</s>",
+    },
+  },
+};
diff --git a/tests/models/qwen2/tokenization.js b/tests/models/qwen2/tokenization.js
new file mode 100644
index 000000000..4bcdeaeed
--- /dev/null
+++ b/tests/models/qwen2/tokenization.js
@@ -0,0 +1,158 @@
+import { Qwen2Tokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = Qwen2Tokenizer;
+export const TEST_CONFIG = {
+  "Xenova/Qwen1.5-0.5B-Chat": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "\u0120are", "\u0120you", "\u0120doing", "?"],
+      ids: [4340, 525, 498, 3730, 30],
+      decoded: "How are you doing?",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "\u0120should", "'ve", "\u0120done", "\u0120this"],
+      ids: [2610, 1265, 3003, 2814, 419],
+      decoded: "You should've done this",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "\u0120", "0", "\u0120", "1", "\u0120", "2", "\u0120", "3", "\u0120", "4", "\u0120", "5", "\u0120", "6", "\u0120", "7", "\u0120", "8", "\u0120", "9", "\u0120", "1", "0", "\u0120", "1", "0", "0", "\u0120", "1", "0", "0", "0"],
+      ids: [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 220, 15, 220, 16, 220, 17, 220, 18, 220, 19, 220, 20, 220, 21, 220, 22, 220, 23, 220, 24, 220, 16, 15, 220, 16, 15, 15, 220, 16, 15, 15, 15],
+      decoded: "0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120founded", "\u0120in", "\u0120", "2", "0", "1", "6", "."],
+      ids: [785, 2813, 572, 18047, 304, 220, 17, 15, 16, 21, 13],
+      decoded: "The company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'ll", "\u0120!!", "to", "?'", "d", "''", "d", "\u0120of", ",", "\u0120can", "'t", "."],
+      ids: [32, 198, 3278, 11015, 983, 20224, 67, 4605, 67, 315, 11, 646, 944, 13],
+      decoded: "A\n'll !!to?'d''d of, can't.",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "\u0120main", "():\u010a", "\u0109pass"],
+      ids: [750, 1887, 3932, 41431],
+      decoded: "def main():\n\tpass",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120obj", ".toString", "();\u010a", "toString", "();"],
+      ids: [1149, 264, 284, 2839, 5070, 543, 6575, 2129],
+      decoded: "let a = obj.toString();\ntoString();",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["This", "\u010a\u010a", "is", "\u010a", "a", "\u010a", "test", "."],
+      ids: [1986, 271, 285, 198, 64, 198, 1944, 13],
+      decoded: "This\n\nis\na\ntest.",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "want", "\u00c3\u00a9d", ",", "running"],
+      ids: [1861, 52657, 15083, 11, 27173],
+      decoded: "UNwant\u00e9d,running",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0100", "2", "\u00ef\u00bf\u00bd", "3"],
+      ids: [16, 188, 17, 5691, 18],
+      decoded: "1\u00002\ufffd3",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hello", "\u0120World"],
+      ids: [9707, 4337],
+      decoded: "Hello World",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello", "\u0120world"],
+      ids: [14990, 1879],
+      decoded: "hello world",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141\u00e6\u00b4\u00bb\u00e7\u013c\u0126", "\u00e7\u013e\u0141", "\u00e8\u00b0\u013d", "\u00e6\u013a\u00af"],
+      ids: [105301, 88051, 116109, 20412],
+      decoded: "\u751f\u6d3b\u7684\u771f\u8c1b\u662f",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120\u0120", "\u0120leading", "\u0120space"],
+      ids: [256, 6388, 3550],
+      decoded: "   leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tr", "ailing", "\u0120space", "\u0120\u0120\u0120"],
+      ids: [376, 14277, 3550, 262],
+      decoded: "trailing space   ",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["Hi", "\u0120", "\u0120Hello"],
+      ids: [13048, 220, 21927],
+      decoded: "Hi  Hello",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2\u00a5", "6", "\u0120\u00e2", "\u0124", "\u00a3", "7", "\u0120\u00e2\u0124\u00b9", "8", "\u0120\u00e2", "\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [1944, 400, 16, 431, 17, 671, 18, 12984, 19, 6938, 20, 71488, 21, 2858, 224, 96, 22, 89791, 23, 2858, 224, 109, 24, 1273],
+      decoded: "test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120bought", "\u0120an", "\u0120apple", "\u0120for", "\u0120$", "1", ".", "0", "0", "\u0120at", "\u0120the", "\u0120store", "."],
+      ids: [40, 10788, 458, 23268, 369, 400, 16, 13, 15, 15, 518, 279, 3553, 13],
+      decoded: "I bought an apple for $1.00 at the store.",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u0120\u0120"],
+      ids: [9330, 1940, 256],
+      decoded: "you\u2026  ",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [9330, 1940, 9238],
+      decoded: "you\u2026\u00a0\u00a0",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142", "\u00c2\u0142", "you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [9330, 1940, 4102, 4102, 9330, 1940, 9238],
+      decoded: "you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120", "\u00ef\u00bd\u0140", "\u0120edge", "\u0120", "\u00ef\u00bd\u0140", "\u0120case"],
+      ids: [896, 2603, 220, 21216, 6821, 220, 21216, 1142],
+      decoded: "weird \uff5e edge \uff5e case",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u00e2\u0138", "\u0123", "This", "\u0120\u00e2\u0138", "\u0123", "is", "\u0120\u00e2\u0138", "\u0123", "a", "\u0120\u00e2\u0138", "\u0123", "test", "\u0120\u00e2\u0138", "\u0123", "."],
+      ids: [10417, 223, 1986, 14520, 223, 285, 14520, 223, 64, 14520, 223, 1944, 14520, 223, 13],
+      decoded: "\u2581This \u2581is \u2581a \u2581test \u2581.",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u00f0\u0141\u013a\u0124", "\u0120\u00f0\u0141\u0133", "\u012f", "\u0120\u00f0\u0141", "\u00a4", "\u00a3", "\u0120\u00f0\u0141\u013a", "\u012f", "\u0120\u00f0\u0141\u013a", "\u0143", "\u0120\u00f0\u0141", "\u0130", "\u012b", "\u0120\u00f0\u0141", "\u013b", "\u0131", "\u0120\u00f0\u0141\u013a", "\u012c", "\u0120\u00f0\u0141\u0136", "\u00a5", "\u0120\u00f0\u0141\u013a", "\u0123", "\u0120\u00f0\u0141\u013a", "\u0127", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141\u013a", "\u0128", "\u0120\u00f0\u0141\u0133", "\u0131", "\u0120\u00e2\u013f\u00a4", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141\u0134", "\u013e", "\u0120\u00f0\u0141\u0134", "\u013c", "\u0120\u00f0\u0141\u0134", "\u0139", "\u0120\u00f0\u0141\u0134", "\u013b", "\u0120\u00f0\u0141", "\u0138", "\u00a4", "\u0120\u00f0\u0141\u013a", "\u0130", "\u0120\u00f0\u0141\u0133", "\u012e", "\u0120\u00f0\u0141", "\u00a5", "\u00b3", "\u0120\u00f0\u0141\u0134", "\u00aa", "\u0120\u00e2\u013e", "\u00a8", "\u0120\u00f0\u0141\u0133", "\u012b", "\u0120\u00f0\u0141\u0133", "\u0122", "\u0120\u00f0\u0141\u0134", "\u00af", "\u0120\u00f0\u0141", "\u0130", "\u012a", "\u0120\u00f0\u0141", "\u013b", "\u012a", "\u0120\u00f0\u0141", "\u013b", "\u012e", "\u0120\u00f0\u0141\u0134", "\u0122", "\u0120\u00f0\u0141\u0133", "\u0129", "\u0120\u00f0\u0141\u0133", "\u012d", "\u0120\u00e2\u013e", "\u0127", "\u0120\u00f0\u0141", "\u0130", "\u0123", "\u0120\u00f0\u0141", "\u012e", "\u0140", "\u0120\u00f0\u0141", "\u012e", "\u00b8", "\u0120\u00f0\u0141\u0134", "\u00b0"],
+      ids: [144185, 61804, 235, 11162, 97, 96, 26525, 235, 26525, 255, 11162, 236, 231, 11162, 247, 237, 26525, 232, 95069, 98, 26525, 223, 26525, 227, 11162, 97, 245, 26525, 228, 61804, 237, 70470, 30543, 63039, 250, 63039, 248, 63039, 245, 63039, 247, 11162, 244, 97, 26525, 236, 61804, 234, 11162, 98, 111, 63039, 103, 25521, 101, 61804, 231, 61804, 222, 63039, 107, 11162, 236, 230, 11162, 247, 230, 11162, 247, 234, 63039, 222, 61804, 229, 61804, 233, 25521, 227, 11162, 236, 223, 11162, 234, 252, 11162, 234, 116, 63039, 108],
+      decoded: "\ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u00e2\u013e\u00a8", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141\u0133", "\u0123", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141\u0133", "\u00b1", "\u00f0\u0141\u0131\u00bb", "\u0120\u00f0\u0141", "\u0137", "\u00b5", "\u00e2\u0122", "\u012f", "\u00e2\u013b\u0124", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141", "\u00a7", "\u013b", "\u00f0\u0141\u0131\u00bb", "\u00e2\u0122", "\u012f", "\u00e2\u013b\u0124", "\u0120\u00f0\u0141\u0133", "\u00a8", "\u00f0\u0141\u0131\u00bb", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u012e\u00be", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u00a4\u013f", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u00a7\u0133", "\u0120\u00f0\u0141\u0133", "\u00a9", "\u00e2\u0122", "\u012f", "\u00e2\u013f\u00a4", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u0134\u012d", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u0133\u00a8", "\u0120\u00f0\u0141\u0133", "\u00a9", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u0133\u00a9", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u0133\u00a7", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u0133\u00a6", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141\u0131\u00bb", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u00a4\u013f", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u00a7\u0133", "\u00f0\u0141\u0131\u00bb", "\u0120\u00f0\u0141", "\u0131", "\u00b4", "\u00f3", "\u0142\u0123", "\u00a7", "\u00f3", "\u0142\u0123", "\u00a2", "\u00f3", "\u0142\u0123", "\u00a5", "\u00f3", "\u0142\u0123", "\u00ae", "\u00f3", "\u0142\u0123", "\u00a7", "\u00f3", "\u0142\u0123", "\u00bf", "\u0120\u00f0\u0141\u0133", "\u00a8", "\u00f0\u0141\u0131\u00bb", "\u00e2\u0122", "\u012f", "\u00e2\u013f\u00a4", "\u00ef\u00b8\u0131", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u0134\u012d", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u0133\u00a8", "\u00f0\u0141\u0131\u00bc"],
+      ids: [144232, 11162, 97, 245, 61804, 223, 30543, 61804, 109, 144321, 11162, 243, 113, 378, 235, 144693, 30543, 11162, 100, 247, 144321, 378, 235, 144693, 61804, 101, 144321, 378, 235, 146467, 11162, 100, 239, 378, 235, 146392, 378, 235, 148738, 61804, 102, 378, 235, 141390, 378, 235, 145002, 378, 235, 145367, 61804, 102, 378, 235, 145233, 378, 235, 145665, 378, 235, 145988, 11162, 100, 239, 144321, 378, 235, 146392, 378, 235, 148738, 144321, 11162, 237, 112, 175, 15675, 100, 175, 15675, 95, 175, 15675, 98, 175, 15675, 106, 175, 15675, 100, 175, 15675, 123, 61804, 101, 144321, 378, 235, 141390, 30543, 378, 235, 145002, 378, 235, 145367, 144784],
+      decoded: "\u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75\u200d\u2642\ufe0f \ud83e\uddd9\ud83c\udffb\u200d\u2642 \ud83d\udc68\ud83c\udffb\u200d\ud83c\udf3e \ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1 \ud83d\udc69\u200d\u2764\u200d\ud83d\udc8b\u200d\ud83d\udc68 \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 \ud83e\uddd1\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc68\ud83c\udffc",
+    },
+  },
+};
diff --git a/tests/models/roberta/tokenization.js b/tests/models/roberta/tokenization.js
new file mode 100644
index 000000000..05030999b
--- /dev/null
+++ b/tests/models/roberta/tokenization.js
@@ -0,0 +1,694 @@
+import { RobertaTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, BERT_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = RobertaTokenizer;
+export const TEST_CONFIG = {
+  "jinaai/jina-embeddings-v2-base-de": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "\u0120are", "\u0120you", "\u0120doing", "?"],
+      ids: [0, 3267, 459, 426, 3174, 35, 2],
+      decoded: "<s>How are you doing?</s>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "\u0120should", "'ve", "\u0120done", "\u0120this"],
+      ids: [0, 2606, 1303, 1990, 3022, 555, 2],
+      decoded: "<s>You should've done this</s>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0", "123", "456", "789", "\u01200", "\u01201", "\u01202", "\u01203", "\u01204", "\u01205", "\u01206", "\u01207", "\u01208", "\u01209", "\u012010", "\u0120100", "\u01201000"],
+      ids: [0, 20, 21911, 40271, 51355, 885, 387, 381, 589, 699, 703, 866, 964, 991, 1045, 949, 1873, 8611, 2],
+      decoded: "<s>0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000</s>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120founded", "\u0120in", "\u01202016", "."],
+      ids: [0, 710, 1891, 503, 15604, 295, 2262, 18, 2],
+      decoded: "<s>The company was founded in 2016.</s>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'ll", "\u0120!!", "to", "?'", "d", "''", "d", "\u0120of", ",", "\u0120can", "'t", "."],
+      ids: [0, 37, 203, 2202, 26143, 764, 30080, 72, 12228, 72, 314, 16, 571, 797, 18, 2],
+      decoded: "<s>A\n'll!!to?'d''d of, can't.</s>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "\u0120main", "(", "):", "\u010a", "\u0109", "pass"],
+      ids: [0, 28273, 1911, 12, 4025, 203, 202, 5517, 2],
+      decoded: "<s>def main():\n\tpass</s>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120obj", ".", "to", "String", "();", "\u010a", "to", "String", "();"],
+      ids: [0, 1642, 264, 3887, 8273, 18, 764, 53889, 54181, 203, 764, 53889, 54181, 2],
+      decoded: "<s>let a = obj.toString();\ntoString();</s>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["This", "\u010a", "\u010a", "is", "\u010a", "a", "\u010a", "test", "."],
+      ids: [0, 1803, 203, 203, 276, 203, 69, 203, 4451, 18, 2],
+      decoded: "<s>This\n\nis\na\ntest.</s>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "w", "ant", "\u00c3\u00a9d", ",", "running"],
+      ids: [0, 3854, 91, 526, 46298, 16, 47232, 2],
+      decoded: "<s>UNwant\u00e9d,running</s>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0100", "2", "\u00ef\u00bf\u00bd", "3"],
+      ids: [0, 21, 193, 22, 1998, 23, 2],
+      decoded: "<s>1\u00002\ufffd3</s>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hello", "\u0120World"],
+      ids: [0, 17964, 3519, 2],
+      decoded: "<s>Hello World</s>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hell", "o", "\u0120world"],
+      ids: [0, 17067, 83, 1568, 2],
+      decoded: "<s>hello world</s>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136", "\u0141", "\u00e6", "\u00b4", "\u00bb", "\u00e7\u013c\u0126", "\u00e7", "\u013e", "\u0141", "\u00e8", "\u00b0", "\u013d", "\u00e6", "\u013a", "\u00af"],
+      ids: [0, 55225, 258, 167, 117, 124, 44574, 168, 255, 258, 169, 113, 254, 167, 251, 112, 2],
+      decoded: "<s>\u751f\u6d3b\u7684\u771f\u8c1b\u662f</s>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120\u0120", "\u0120leading", "\u0120space"],
+      ids: [0, 6733, 5344, 3435, 2],
+      decoded: "<s>   leading space</s>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tra", "iling", "\u0120space", "\u0120\u0120\u0120"],
+      ids: [0, 766, 7462, 3435, 53448, 2],
+      decoded: "<s>trailing space   </s>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["Hi", "\u0120", "\u0120Hello"],
+      ids: [0, 10103, 225, 29546, 2],
+      decoded: "<s>Hi  Hello</s>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2", "\u00a5", "6", "\u0120\u00e2", "\u0124", "\u00a3", "7", "\u0120\u00e2\u0124\u00b9", "8", "\u0120\u00e2", "\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [0, 4451, 1350, 21, 366, 22, 1805, 23, 2712, 24, 6339, 25, 960, 103, 26, 1581, 229, 101, 27, 58720, 28, 1581, 229, 114, 29, 2839, 2],
+      decoded: "<s>test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test</s>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120bought", "\u0120an", "\u0120apple", "\u0120for", "\u0120$", "1", ".", "00", "\u0120at", "\u0120the", "\u0120store", "."],
+      ids: [0, 45, 8928, 371, 19798, 382, 1350, 21, 18, 505, 495, 285, 4569, 18, 2],
+      decoded: "<s>I bought an apple for $1.00 at the store.</s>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u0120\u0120"],
+      ids: [0, 10695, 1179, 6733, 2],
+      decoded: "<s>you\u2026  </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [0, 10695, 1179, 44105, 2],
+      decoded: "<s>you\u2026\u00a0\u00a0</s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142", "\u00c2\u0142", "you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [0, 10695, 1179, 15529, 15529, 10695, 1179, 44105, 2],
+      decoded: "<s>you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0</s>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120\u00ef", "\u00bd", "\u0140", "\u0120edge", "\u0120\u00ef", "\u00bd", "\u0140", "\u0120case"],
+      ids: [0, 487, 2394, 17740, 126, 257, 9911, 17740, 126, 257, 2600, 2],
+      decoded: "<s>weird \uff5e edge \uff5e case</s>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u00e2\u0138", "\u0123", "This", "\u0120\u00e2\u0138", "\u0123", "is", "\u0120\u00e2\u0138", "\u0123", "a", "\u0120\u00e2\u0138", "\u0123", "test", "\u0120\u00e2\u0138", "\u0123", "."],
+      ids: [0, 12790, 228, 1803, 20068, 228, 276, 20068, 228, 69, 20068, 228, 4451, 20068, 228, 18, 2],
+      decoded: "<s>\u2581This \u2581is \u2581a \u2581test \u2581.</s>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u00f0\u0141\u013a", "\u0124", "\u0120\u00f0\u0141\u0133", "\u012f", "\u0120\u00f0\u0141", "\u00a4", "\u00a3", "\u0120\u00f0\u0141\u013a", "\u012f", "\u0120\u00f0\u0141\u013a", "\u0143", "\u0120\u00f0\u0141", "\u0130", "\u012b", "\u0120\u00f0\u0141\u013b", "\u0131", "\u0120\u00f0\u0141\u013a", "\u012c", "\u0120\u00f0\u0141", "\u0136", "\u00a5", "\u0120\u00f0\u0141\u013a", "\u0123", "\u0120\u00f0\u0141\u013a", "\u0127", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141\u013a", "\u0128", "\u0120\u00f0\u0141\u0133", "\u0131", "\u0120\u00e2\u013f\u00a4", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141\u0134", "\u013e", "\u0120\u00f0\u0141\u0134", "\u013c", "\u0120\u00f0\u0141\u0134", "\u0139", "\u0120\u00f0\u0141\u0134", "\u013b", "\u0120\u00f0\u0141", "\u0138", "\u00a4", "\u0120\u00f0\u0141\u013a", "\u0130", "\u0120\u00f0\u0141\u0133", "\u012e", "\u0120\u00f0\u0141", "\u00a5", "\u00b3", "\u0120\u00f0\u0141\u0134", "\u00aa", "\u0120\u00e2\u013e", "\u00a8", "\u0120\u00f0\u0141\u0133", "\u012b", "\u0120\u00f0\u0141\u0133", "\u0122", "\u0120\u00f0\u0141\u0134", "\u00af", "\u0120\u00f0\u0141", "\u0130", "\u012a", "\u0120\u00f0\u0141\u013b", "\u012a", "\u0120\u00f0\u0141\u013b", "\u012e", "\u0120\u00f0\u0141\u0134", "\u0122", "\u0120\u00f0\u0141\u0133", "\u0129", "\u0120\u00f0\u0141\u0133", "\u012d", "\u0120\u00e2\u013e\u0127", "\u0120\u00f0\u0141", "\u0130", "\u0123", "\u0120\u00f0\u0141", "\u012e", "\u0140", "\u0120\u00f0\u0141", "\u012e", "\u00b8", "\u0120\u00f0\u0141\u0134", "\u00b0"],
+      ids: [0, 32164, 229, 49904, 240, 5060, 102, 101, 10278, 240, 10278, 260, 5060, 241, 236, 10319, 242, 10278, 237, 5060, 247, 103, 10278, 228, 10278, 232, 5060, 102, 250, 10278, 233, 49904, 242, 42009, 16598, 52278, 255, 52278, 253, 52278, 250, 52278, 252, 5060, 249, 102, 10278, 241, 49904, 239, 5060, 103, 116, 52278, 108, 10792, 106, 49904, 236, 49904, 227, 52278, 112, 5060, 241, 235, 10319, 235, 10319, 239, 52278, 227, 49904, 234, 49904, 238, 38607, 5060, 241, 228, 5060, 239, 257, 5060, 239, 121, 52278, 113, 2],
+      decoded: "<s>\ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0</s>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u00e2\u013e", "\u00a8", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141\u0133", "\u0123", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141\u0133", "\u00b1", "\u00f0\u0141\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0137", "\u00b5", "\u00e2\u0122\u012f", "\u00e2\u013b", "\u0124", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141", "\u00a7", "\u013b", "\u00f0\u0141\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00e2\u013b", "\u0124", "\u0120\u00f0\u0141\u0133", "\u00a8", "\u00f0\u0141\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u012e", "\u00be", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u0120\u00f0\u0141\u0133", "\u00a9", "\u00e2\u0122\u012f", "\u00e2\u013f\u00a4", "\u00e2\u0122\u012f", "\u00f0\u0141\u0134", "\u012d", "\u00e2\u0122\u012f", "\u00f0\u0141\u0133", "\u00a8", "\u0120\u00f0\u0141\u0133", "\u00a9", "\u00e2\u0122\u012f", "\u00f0\u0141\u0133", "\u00a9", "\u00e2\u0122\u012f", "\u00f0\u0141\u0133", "\u00a7", "\u00e2\u0122\u012f", "\u00f0\u0141\u0133", "\u00a6", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0131", "\u00b4", "\u00f3", "\u0142", "\u0123", "\u00a7", "\u00f3", "\u0142", "\u0123", "\u00a2", "\u00f3", "\u0142", "\u0123", "\u00a5", "\u00f3", "\u0142", "\u0123", "\u00ae", "\u00f3", "\u0142", "\u0123", "\u00a7", "\u00f3", "\u0142", "\u0123", "\u00bf", "\u0120\u00f0\u0141\u0133", "\u00a8", "\u00f0\u0141\u0131", "\u00bb", "\u00e2\u0122\u012f", "\u00e2\u013f\u00a4", "\u00ef\u00b8\u0131", "\u00e2\u0122\u012f", "\u00f0\u0141\u0134", "\u012d", "\u00e2\u0122\u012f", "\u00f0\u0141\u0133", "\u00a8", "\u00f0\u0141\u0131", "\u00bc"],
+      ids: [0, 20675, 106, 5060, 102, 250, 49904, 228, 16598, 49904, 114, 49365, 124, 5060, 248, 118, 54678, 26323, 229, 16598, 5060, 105, 252, 49365, 124, 54678, 26323, 229, 49904, 106, 49365, 124, 54678, 3753, 239, 127, 5060, 105, 244, 54678, 3753, 102, 256, 54678, 3753, 105, 244, 49904, 107, 54678, 49144, 54678, 41347, 238, 54678, 43307, 106, 49904, 107, 54678, 43307, 107, 54678, 43307, 105, 54678, 43307, 104, 5060, 105, 244, 49365, 124, 54678, 3753, 102, 256, 54678, 3753, 105, 244, 49365, 124, 5060, 242, 117, 180, 259, 228, 105, 180, 259, 228, 100, 180, 259, 228, 103, 180, 259, 228, 111, 180, 259, 228, 105, 180, 259, 228, 128, 49904, 106, 49365, 124, 54678, 49144, 16598, 54678, 41347, 238, 54678, 43307, 106, 49365, 125, 2],
+      decoded: "<s>\u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75\u200d\u2642\ufe0f \ud83e\uddd9\ud83c\udffb\u200d\u2642 \ud83d\udc68\ud83c\udffb\u200d\ud83c\udf3e \ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1 \ud83d\udc69\u200d\u2764\u200d\ud83d\udc8b\u200d\ud83d\udc68 \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 \ud83e\uddd1\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc68\ud83c\udffc</s>",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["ah", "\u00e5", "\u012f", "\u013c", "\u00e6", "\u0130", "\u00a8", "zz"],
+      ids: [0, 500, 166, 240, 253, 167, 241, 106, 9326, 2],
+      decoded: "<s>ah\u535a\u63a8zz</s>",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["H", "\u00c3\u00a9", "llo"],
+      ids: [0, 44, 2277, 31053, 2],
+      decoded: "<s>H\u00e9llo</s>",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["\u0120", "\u0109", "He", "L", "Lo", "!", "how", "\u0120\u0120", "\u010a", "\u0120Are", "\u0120y", "o", "U", "?", "\u0120\u0120"],
+      ids: [0, 225, 202, 2523, 48, 17901, 5, 7253, 6733, 203, 5175, 361, 83, 57, 35, 6733, 2],
+      decoded: "<s> \tHeLLo!how  \n Are yoU?  </s>",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["\u0120", "\u0109", "H", "\u00c3\u00a4", "L", "Lo", "!", "how", "\u0120\u0120", "\u010a", "\u0120Are", "\u0120y", "o", "U", "?", "\u0120\u0120"],
+      ids: [0, 225, 202, 44, 325, 48, 17901, 5, 7253, 6733, 203, 5175, 361, 83, 57, 35, 6733, 2],
+      decoded: "<s> \tH\u00e4LLo!how  \n Are yoU?  </s>",
+    },
+
+    TEXT_PAIR: {
+      text: "hello",
+      text_pair: "world",
+      tokens: ["hell", "o", "world"],
+      ids: [0, 17067, 83, 2, 2, 13639, 2],
+      decoded: "<s>hello</s></s>world</s>",
+    },
+  },
+  "jinaai/jina-embeddings-v2-base-code": {
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0123456789", "\u01200", "\u01201", "\u01202", "\u01203", "\u01204", "\u01205", "\u01206", "\u01207", "\u01208", "\u01209", "\u012010", "\u0120100", "\u01201000"],
+      ids: [0, 22133, 325, 397, 491, 795, 879, 997, 1434, 1577, 1240, 1926, 1528, 2069, 5216, 2],
+      decoded: "<s>0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000</s>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120f", "ounded", "\u0120in", "\u01202016", "."],
+      ids: [0, 1664, 18100, 2146, 304, 12402, 338, 7541, 18, 2],
+      decoded: "<s>The company was founded in 2016.</s>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "\u0120main", "():", "\u010a", "\u0109", "pass"],
+      ids: [0, 406, 3578, 3281, 203, 202, 4557, 2],
+      decoded: "<s>def main():\n\tpass</s>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120obj", ".", "toString", "();", "\u010a", "toString", "();"],
+      ids: [0, 953, 323, 278, 2666, 18, 3411, 467, 203, 3411, 467, 2],
+      decoded: "<s>let a = obj.toString();\ntoString();</s>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "want", "\u00c3\u00a9", "d", ",", "running"],
+      ids: [0, 1129, 13944, 2521, 72, 16, 8423, 2],
+      decoded: "<s>UNwant\u00e9d,running</s>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello", "\u0120world"],
+      ids: [0, 9522, 7550, 2],
+      decoded: "<s>hello world</s>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141", "\u00e6\u00b4\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e\u0141", "\u00e8\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [0, 12173, 28408, 2149, 36264, 12338, 254, 4988, 2],
+      decoded: "<s>\u751f\u6d3b\u7684\u771f\u8c1b\u662f</s>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["trailing", "\u0120space", "\u0120\u0120\u0120"],
+      ids: [0, 29801, 4113, 264, 2],
+      decoded: "<s>trailing space   </s>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2", "\u0124\u00ac", "4", "\u0120\u00c2", "\u00a3", "5", "\u0120\u00c2", "\u00a5", "6", "\u0120\u00e2", "\u0124", "\u00a3", "7", "\u0120\u00e2", "\u0124", "\u00b9", "8", "\u0120\u00e2", "\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [0, 1052, 393, 21, 741, 22, 592, 23, 12284, 16181, 24, 5519, 101, 25, 5519, 103, 26, 12284, 229, 101, 27, 12284, 229, 122, 28, 12284, 229, 114, 29, 1089, 2],
+      decoded: "<s>test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test</s>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120b", "ought", "\u0120an", "\u0120apple", "\u0120for", "\u0120$", "1", ".", "00", "\u0120at", "\u0120the", "\u0120store", "."],
+      ids: [0, 45, 328, 17412, 692, 33091, 455, 393, 21, 18, 337, 913, 329, 3205, 18, 2],
+      decoded: "<s>I bought an apple for $1.00 at the store.</s>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120", "\u00ef\u00bd", "\u0140", "\u0120edge", "\u0120", "\u00ef\u00bd", "\u0140", "\u0120case"],
+      ids: [0, 1643, 6005, 225, 44634, 257, 7158, 225, 44634, 257, 1007, 2],
+      decoded: "<s>weird \uff5e edge \uff5e case</s>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u00e2\u0138", "\u0123", "This", "\u0120", "\u00e2\u0138", "\u0123", "is", "\u0120", "\u00e2\u0138", "\u0123", "a", "\u0120", "\u00e2\u0138", "\u0123", "test", "\u0120", "\u00e2\u0138", "\u0123", "."],
+      ids: [0, 8550, 228, 2744, 225, 8550, 228, 302, 225, 8550, 228, 69, 225, 8550, 228, 1052, 225, 8550, 228, 18, 2],
+      decoded: "<s>\u2581This \u2581is \u2581a \u2581test \u2581.</s>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u00f0\u0141", "\u013a", "\u0124", "\u0120\u00f0\u0141\u0133", "\u012f", "\u0120\u00f0\u0141", "\u00a4", "\u00a3", "\u0120\u00f0\u0141", "\u013a", "\u012f", "\u0120\u00f0\u0141", "\u013a", "\u0143", "\u0120\u00f0\u0141", "\u0130", "\u012b", "\u0120\u00f0\u0141", "\u013b", "\u0131", "\u0120\u00f0\u0141", "\u013a", "\u012c", "\u0120\u00f0\u0141", "\u0136", "\u00a5", "\u0120\u00f0\u0141", "\u013a", "\u0123", "\u0120\u00f0\u0141", "\u013a\u0127", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141", "\u013a", "\u0128", "\u0120\u00f0\u0141\u0133", "\u0131", "\u0120\u00e2", "\u013f", "\u00a4", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141", "\u0134", "\u013e", "\u0120\u00f0\u0141", "\u0134", "\u013c", "\u0120\u00f0\u0141", "\u0134", "\u0139", "\u0120\u00f0\u0141", "\u0134", "\u013b", "\u0120\u00f0\u0141", "\u0138", "\u00a4", "\u0120\u00f0\u0141", "\u013a", "\u0130", "\u0120\u00f0\u0141\u0133", "\u012e", "\u0120\u00f0\u0141", "\u00a5", "\u00b3", "\u0120\u00f0\u0141", "\u0134", "\u00aa", "\u0120\u00e2", "\u013e", "\u00a8", "\u0120\u00f0\u0141\u0133", "\u012b", "\u0120\u00f0\u0141\u0133", "\u0122", "\u0120\u00f0\u0141", "\u0134", "\u00af", "\u0120\u00f0\u0141", "\u0130", "\u012a", "\u0120\u00f0\u0141", "\u013b", "\u012a", "\u0120\u00f0\u0141", "\u013b", "\u012e", "\u0120\u00f0\u0141", "\u0134", "\u0122", "\u0120\u00f0\u0141\u0133", "\u0129", "\u0120\u00f0\u0141\u0133", "\u012d", "\u0120\u00e2", "\u013e", "\u0127", "\u0120\u00f0\u0141", "\u0130", "\u0123", "\u0120\u00f0\u0141", "\u012e", "\u0140", "\u0120\u00f0\u0141", "\u012e", "\u00b8", "\u0120\u00f0\u0141", "\u0134", "\u00b0"],
+      ids: [0, 8000, 251, 229, 22730, 240, 9919, 102, 101, 9919, 251, 240, 9919, 251, 260, 9919, 241, 236, 9919, 252, 242, 9919, 251, 237, 9919, 247, 103, 9919, 251, 228, 9919, 38879, 9919, 102, 250, 9919, 251, 233, 22730, 242, 12284, 256, 102, 26726, 9919, 245, 255, 9919, 245, 253, 9919, 245, 250, 9919, 245, 252, 9919, 249, 102, 9919, 251, 241, 22730, 239, 9919, 103, 116, 9919, 245, 108, 12284, 255, 106, 22730, 236, 22730, 227, 9919, 245, 112, 9919, 241, 235, 9919, 252, 235, 9919, 252, 239, 9919, 245, 227, 22730, 234, 22730, 238, 12284, 255, 232, 9919, 241, 228, 9919, 239, 257, 9919, 239, 121, 9919, 245, 113, 2],
+      decoded: "<s>\ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0</s>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u00e2\u013e", "\u00a8", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141\u0133", "\u0123", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141\u0133", "\u00b1", "\u00f0\u0141", "\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0137", "\u00b5", "\u00e2\u0122", "\u012f", "\u00e2\u013b", "\u0124", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141\u00a7", "\u013b", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00e2\u013b", "\u0124", "\u0120\u00f0\u0141\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u012e", "\u00be", "\u0120\u00f0\u0141\u00a7", "\u0133", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u0120\u00f0\u0141\u0133", "\u00a9", "\u00e2\u0122", "\u012f", "\u00e2", "\u013f", "\u00a4", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0134", "\u012d", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a8", "\u0120\u00f0\u0141\u0133", "\u00a9", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a9", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a7", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a6", "\u0120\u00f0\u0141\u00a7", "\u0133", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141", "\u0131", "\u00bb", "\u0120\u00f0\u0141\u0131", "\u00b4", "\u00f3", "\u0142\u0123", "\u00a7", "\u00f3", "\u0142\u0123", "\u00a2", "\u00f3", "\u0142\u0123", "\u00a5", "\u00f3", "\u0142\u0123", "\u00ae", "\u00f3", "\u0142\u0123", "\u00a7", "\u00f3", "\u0142\u0123", "\u00bf", "\u0120\u00f0\u0141\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00e2", "\u013f", "\u00a4", "\u00ef\u00b8\u0131", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0134", "\u012d", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bc"],
+      ids: [0, 60553, 106, 9919, 102, 250, 22730, 228, 26726, 22730, 114, 8000, 242, 124, 9919, 248, 118, 2965, 240, 30370, 229, 26726, 31249, 252, 8000, 242, 124, 2965, 240, 30370, 229, 22730, 106, 8000, 242, 124, 2965, 240, 8000, 239, 127, 31249, 244, 2965, 240, 8000, 102, 256, 2965, 240, 8000, 105, 244, 22730, 107, 2965, 240, 163, 256, 102, 2965, 240, 8000, 245, 238, 2965, 240, 8000, 244, 106, 22730, 107, 2965, 240, 8000, 244, 107, 2965, 240, 8000, 244, 105, 2965, 240, 8000, 244, 104, 31249, 244, 8000, 242, 124, 2965, 240, 8000, 102, 256, 2965, 240, 8000, 105, 244, 8000, 242, 124, 58646, 117, 180, 9752, 105, 180, 9752, 100, 180, 9752, 103, 180, 9752, 111, 180, 9752, 105, 180, 9752, 128, 22730, 106, 8000, 242, 124, 2965, 240, 163, 256, 102, 26726, 2965, 240, 8000, 245, 238, 2965, 240, 8000, 244, 106, 8000, 242, 125, 2],
+      decoded: "<s>\u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75\u200d\u2642\ufe0f \ud83e\uddd9\ud83c\udffb\u200d\u2642 \ud83d\udc68\ud83c\udffb\u200d\ud83c\udf3e \ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1 \ud83d\udc69\u200d\u2764\u200d\ud83d\udc8b\u200d\ud83d\udc68 \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 \ud83e\uddd1\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc68\ud83c\udffc</s>",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["ah", "\u00e5\u012f\u013c", "\u00e6\u0130\u00a8", "zz"],
+      ids: [0, 3885, 33588, 28002, 4881, 2],
+      decoded: "<s>ah\u535a\u63a8zz</s>",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["H", "\u00c3\u00a9", "l", "lo"],
+      ids: [0, 44, 2521, 80, 324, 2],
+      decoded: "<s>H\u00e9llo</s>",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["\u0120", "\u0109", "He", "L", "Lo", "!", "how", "\u0120\u0120\u010a", "\u0120Are", "\u0120y", "o", "U", "?", "\u0120\u0120"],
+      ids: [0, 225, 202, 1397, 48, 1898, 5, 11452, 11092, 14877, 711, 83, 57, 35, 261, 2],
+      decoded: "<s> \tHeLLo!how  \n Are yoU?  </s>",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["\u0120", "\u0109", "H", "\u00c3\u00a4", "L", "Lo", "!", "how", "\u0120\u0120\u010a", "\u0120Are", "\u0120y", "o", "U", "?", "\u0120\u0120"],
+      ids: [0, 225, 202, 44, 4319, 48, 1898, 5, 11452, 11092, 14877, 711, 83, 57, 35, 261, 2],
+      decoded: "<s> \tH\u00e4LLo!how  \n Are yoU?  </s>",
+    },
+  },
+  "jinaai/jina-reranker-v1-tiny-en": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["how", "are", "you", "doing", "?"],
+      ids: [0, 21431, 21182, 21166, 22540, 61, 2],
+      decoded: "<s> how are you doing? </s>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["you", "should", "'", "ve", "done", "this"],
+      ids: [0, 21166, 21602, 37, 21165, 22366, 21225, 2],
+      decoded: "<s> you should've done this </s>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0", "123", "456", "78", "9", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "100", "1000"],
+      ids: [0, 46, 32683, 56678, 25106, 55, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 21449, 22057, 25148, 2],
+      decoded: "<s> 0 123 456 78 9 0 1 2 3 4 5 6 7 8 9 10 100 1000 </s>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["the", "company", "was", "founded", "in", "2016", "."],
+      ids: [0, 21138, 21781, 21257, 26707, 21135, 22672, 44, 2],
+      decoded: "<s> the company was founded in 2016. </s>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["a", "'", "ll", "!!", "to", "?'", "d", "''", "d", "of", ",", "can", "'", "t", "."],
+      ids: [0, 69, 37, 21264, 22236, 21148, 58125, 72, 31803, 72, 21155, 42, 21243, 37, 88, 44, 2],
+      decoded: "<s> a'll!! to?' d '' d of, can't. </s>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "main", "(", "):", "pass"],
+      ids: [0, 22747, 21810, 38, 25247, 21924, 2],
+      decoded: "<s> def main ( ): pass </s>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "a", "=", "ob", "j", ".", "to", "string", "(", ");", "to", "string", "(", ");"],
+      ids: [0, 21621, 69, 59, 21706, 78, 44, 21148, 26762, 38, 25750, 21148, 26762, 38, 25750, 2],
+      decoded: "<s> let a = ob j. to string ( ); to string ( ); </s>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["this", "is", "a", "test", "."],
+      ids: [0, 21225, 21152, 69, 21828, 44, 2],
+      decoded: "<s> this is a test. </s>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["un", "want", "\u00e9", "d", ",", "running"],
+      ids: [0, 21193, 21569, 173, 72, 42, 23225, 2],
+      decoded: "<s> un want \u00e9 d, running </s>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0000", "2", "\ufffd", "3"],
+      ids: [0, 47, 5, 48, 20321, 49, 2],
+      decoded: "<s> 1 \u0000 2 \ufffd 3 </s>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["hello", "world"],
+      ids: [0, 28687, 21628, 2],
+      decoded: "<s> hello world </s>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello", "world"],
+      ids: [0, 28687, 21628, 2],
+      decoded: "<s> hello world </s>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u751f\u6d3b\u7684", "\u771f", "\u8c1b", "\u662f"],
+      ids: [0, 31805, 11140, 14597, 8097, 2],
+      decoded: "<s> \u751f\u6d3b\u7684 \u771f \u8c1b \u662f </s>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["leading", "space"],
+      ids: [0, 23462, 22283, 2],
+      decoded: "<s> leading space </s>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["trailing", "space"],
+      ids: [0, 52572, 22283, 2],
+      decoded: "<s> trailing space </s>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["hi", "hello"],
+      ids: [0, 23233, 28687, 2],
+      decoded: "<s> hi hello </s>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "$", "1", "r2", "#", "3", "\u20ac", "4", "\u00a3", "5", "\u00a5", "6", "<unk>", "7", "\u20b9", "8", "\u20b1", "9", "test"],
+      ids: [0, 21828, 34, 47, 46925, 33, 49, 2155, 50, 133, 51, 135, 52, 3, 53, 2159, 54, 2157, 55, 21828, 2],
+      decoded: "<s> test $ 1 r2 # 3 \u20ac 4 \u00a3 5 \u00a5 6 <unk> 7 \u20b9 8 \u20b1 9 test </s>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["i", "bought", "an", "apple", "for", "$", "1", ".", "00", "at", "the", "store", "."],
+      ids: [0, 77, 25474, 21136, 24208, 21169, 34, 47, 44, 21298, 21141, 21138, 22657, 44, 2],
+      decoded: "<s> i bought an apple for $ 1. 00 at the store. </s>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u2026"],
+      ids: [0, 21166, 2091, 2],
+      decoded: "<s> you \u2026 </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u2026"],
+      ids: [0, 21166, 2091, 2],
+      decoded: "<s> you \u2026 </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u2026", "you", "\u2026"],
+      ids: [0, 21166, 2091, 21166, 2091, 2],
+      decoded: "<s> you \u2026 you \u2026 </s>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["weird", "\uff5e", "edge", "\uff5e", "case"],
+      ids: [0, 31376, 20249, 24273, 20249, 22111, 2],
+      decoded: "<s> weird \uff5e edge \uff5e case </s>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581", "this", "\u2581", "is", "\u2581", "a", "\u2581", "test", "\u2581", "."],
+      ids: [0, 2541, 21225, 2541, 21152, 2541, 69, 2541, 21828, 2541, 44, 2],
+      decoded: "<s> \u2581 this \u2581 is \u2581 a \u2581 test \u2581. </s>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\ud83d\ude02", "\ud83d\udc4d", "\ud83e\udd23", "\ud83d\ude0d", "\ud83d\ude2d", "\ud83c\udf89", "\ud83d\ude4f", "\ud83d\ude0a", "\ud83d\udd25", "\ud83d\ude01", "\ud83d\ude05", "\ud83e\udd17", "\ud83d\ude06", "\ud83d\udc4f", "\u2764", "\ufe0f", "\ud83d\udc9c", "\ud83d\udc9a", "\ud83d\udc97", "\ud83d\udc99", "\ud83d\udda4", "\ud83d\ude0e", "\ud83d\udc4c", "\ud83e\udd73", "\ud83d\udcaa", "\u2728", "\ud83d\udc49", "\ud83d\udc40", "\ud83d\udcaf", "\ud83c\udf88", "\ud83d\ude48", "\ud83d\ude4c", "\ud83d\udc80", "\ud83d\udc47", "\ud83d\udc4b", "\u2705", "\ud83c\udf81", "\ud83c\udf1e", "\ud83c\udf38", "\ud83d\udcb0"],
+      ids: [0, 20904, 20749, 21000, 20915, 20943, 20645, 20964, 20912, 20879, 20903, 20907, 20992, 20908, 20751, 2781, 20133, 20807, 20805, 20802, 20804, 20898, 20916, 20748, 21031, 20817, 2742, 20745, 20738, 20821, 20644, 20961, 20963, 20784, 20743, 20747, 2720, 20637, 20574, 20588, 20822, 2],
+      decoded: "<s> \ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764 \ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0 </s>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2728", "\ud83e\udd17", "<unk>", "\ufe0f", "\ud83d\udc71", "\ud83c\udffb", "\ud83d\udd75", "\u200d", "\u2642", "\ufe0f", "\ud83e\uddd9", "\ud83c\udffb", "\u200d", "\u2642", "\ud83d\udc68", "\ud83c\udffb", "\u200d", "\ud83c\udf3e", "\ud83e\uddd1", "\u200d", "\ud83e\udd1d", "\u200d", "\ud83e\uddd1", "\ud83d\udc69", "\u200d", "\u2764", "\u200d", "\ud83d\udc8b", "\u200d", "\ud83d\udc68", "\ud83d\udc69", "\u200d", "\ud83d\udc69", "\u200d", "\ud83d\udc67", "\u200d", "\ud83d\udc66", "\ud83e\uddd1", "\ud83c\udffb", "\u200d", "\ud83e\udd1d", "\u200d", "\ud83e\uddd1", "\ud83c\udffb", "\ud83c\udff4", "<unk>", "\udb40\udc62", "<unk>", "<unk>", "<unk>", "\udb40\udc7f", "\ud83d\udc68", "\ud83c\udffb", "\u200d", "\u2764", "\ufe0f", "\u200d", "\ud83d\udc8b", "\u200d", "\ud83d\udc68", "\ud83c\udffc"],
+      ids: [0, 2742, 20992, 3, 20133, 20775, 20700, 20894, 2067, 2662, 20133, 21050, 20700, 2067, 2662, 20768, 20700, 2067, 20593, 21049, 2067, 20995, 2067, 21049, 20769, 2067, 2781, 2067, 20792, 2067, 20768, 20769, 2067, 20769, 2067, 20767, 2067, 20766, 21049, 20700, 2067, 20995, 2067, 21049, 20700, 20697, 3, 21126, 3, 3, 3, 21130, 20768, 20700, 2067, 2781, 20133, 2067, 20792, 2067, 20768, 20701, 2],
+      decoded: "<s> \u2728 \ud83e\udd17 <unk> \ufe0f \ud83d\udc71 \ud83c\udffb \ud83d\udd75 \u200d \u2642 \ufe0f \ud83e\uddd9 \ud83c\udffb \u200d \u2642 \ud83d\udc68 \ud83c\udffb \u200d \ud83c\udf3e \ud83e\uddd1 \u200d \ud83e\udd1d \u200d \ud83e\uddd1 \ud83d\udc69 \u200d \u2764 \u200d \ud83d\udc8b \u200d \ud83d\udc68 \ud83d\udc69 \u200d \ud83d\udc69 \u200d \ud83d\udc67 \u200d \ud83d\udc66 \ud83e\uddd1 \ud83c\udffb \u200d \ud83e\udd1d \u200d \ud83e\uddd1 \ud83c\udffb \ud83c\udff4 <unk> \udb40\udc62 <unk> <unk> <unk> \udb40\udc7f \ud83d\udc68 \ud83c\udffb \u200d \u2764 \ufe0f \u200d \ud83d\udc8b \u200d \ud83d\udc68 \ud83c\udffc </s>",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["ah", "\u535a", "\u63a8", "zz"],
+      ids: [0, 22311, 4352, 7628, 24387, 2],
+      decoded: "<s> ah \u535a \u63a8 zz </s>",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["h", "\u00e9", "llo"],
+      ids: [0, 76, 173, 48932, 2],
+      decoded: "<s> h \u00e9 llo </s>",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["hello", "!", "how", "are", "you", "?"],
+      ids: [0, 28687, 31, 21431, 21182, 21166, 61, 2],
+      decoded: "<s> hello! how are you? </s>",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["h", "\u00e4", "llo", "!", "how", "are", "you", "?"],
+      ids: [0, 76, 168, 48932, 31, 21431, 21182, 21166, 61, 2],
+      decoded: "<s> h \u00e4 llo! how are you? </s>",
+    },
+  },
+  "jinaai/jina-embeddings-v2-base-zh": {
+    // https://huggingface.co/jinaai/jina-embeddings-v2-base-zh/discussions/16
+    // Slow vs. fast tokenizer mismatch
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0000", "2", "\ufffd", "3"],
+      ids: [0, 47, 5, 48, 20321, 49, 2],
+      decoded: "<s> 1 \u0000 2 \ufffd 3 </s>",
+    },
+  },
+  "Xenova/all-distilroberta-v1": {
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["01", "23", "45", "67", "89", "\u01200", "\u01201", "\u01202", "\u01203", "\u01204", "\u01205", "\u01206", "\u01207", "\u01208", "\u01209", "\u012010", "\u0120100", "\u01201000"],
+      ids: [0, 2663, 1922, 1898, 4111, 5046, 321, 112, 132, 155, 204, 195, 231, 262, 290, 361, 158, 727, 10775, 2],
+      decoded: "<s>0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000</s>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141", "\u00e6", "\u00b4", "\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e", "\u0141", "\u00e8", "\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [0, 48998, 37127, 20024, 2023, 44574, 49122, 4333, 36484, 7487, 3726, 48569, 2],
+      decoded: "<s>\u751f\u6d3b\u7684\u771f\u8c1b\u662f</s>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120", "\u0120", "\u0120leading", "\u0120space"],
+      ids: [0, 1437, 1437, 981, 980, 2],
+      decoded: "<s>   leading space</s>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tra", "iling", "\u0120space", "\u0120", "\u0120", "\u0120"],
+      ids: [0, 9738, 7022, 980, 1437, 1437, 1437, 2],
+      decoded: "<s>trailing space   </s>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2\u00a5", "6", "\u0120\u00e2", "\u0124", "\u00a3", "7", "\u0120\u00e2", "\u0124", "\u00b9", "8", "\u0120\u00e2", "\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [0, 21959, 68, 134, 248, 176, 849, 246, 4480, 306, 984, 245, 30844, 401, 14333, 9264, 2469, 406, 14333, 9264, 9253, 398, 14333, 9264, 15389, 466, 1296, 2],
+      decoded: "<s>test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test</s>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u0120", "\u0120"],
+      ids: [0, 6968, 1174, 1437, 1437, 2],
+      decoded: "<s>you\u2026  </s>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u00f0\u0141\u013a", "\u0124", "\u0120\u00f0\u0141\u0133", "\u012f", "\u0120\u00f0\u0141", "\u00a4", "\u00a3", "\u0120\u00f0\u0141\u013a", "\u012f", "\u0120\u00f0\u0141\u013a", "\u0143", "\u0120\u00f0\u0141", "\u0130", "\u012b", "\u0120\u00f0\u0141", "\u013b", "\u0131", "\u0120\u00f0\u0141\u013a", "\u012c", "\u0120\u00f0\u0141", "\u0136", "\u00a5", "\u0120\u00f0\u0141\u013a", "\u0123", "\u0120\u00f0\u0141", "\u013a\u0127", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141\u013a", "\u0128", "\u0120\u00f0\u0141\u0133", "\u0131", "\u0120\u00e2\u013f", "\u00a4", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141", "\u0134", "\u013e", "\u0120\u00f0\u0141", "\u0134", "\u013c", "\u0120\u00f0\u0141", "\u0134", "\u0139", "\u0120\u00f0\u0141", "\u0134", "\u013b", "\u0120\u00f0\u0141", "\u0138", "\u00a4", "\u0120\u00f0\u0141\u013a", "\u0130", "\u0120\u00f0\u0141\u0133", "\u012e", "\u0120\u00f0\u0141", "\u00a5", "\u00b3", "\u0120\u00f0\u0141", "\u0134", "\u00aa", "\u0120\u00e2\u013e", "\u00a8", "\u0120\u00f0\u0141\u0133", "\u012b", "\u0120\u00f0\u0141\u0133", "\u0122", "\u0120\u00f0\u0141", "\u0134", "\u00af", "\u0120\u00f0\u0141", "\u0130", "\u012a", "\u0120\u00f0\u0141", "\u013b", "\u012a", "\u0120\u00f0\u0141", "\u013b", "\u012e", "\u0120\u00f0\u0141", "\u0134", "\u0122", "\u0120\u00f0\u0141\u0133", "\u0129", "\u0120\u00f0\u0141\u0133", "\u012d", "\u0120\u00e2\u013e", "\u0127", "\u0120\u00f0\u0141", "\u0130", "\u0123", "\u0120\u00f0\u0141", "\u012e", "\u0140", "\u0120\u00f0\u0141", "\u012e", "\u00b8", "\u0120\u00f0\u0141", "\u0134", "\u00b0"],
+      ids: [0, 18636, 9264, 26964, 8384, 8103, 10470, 2469, 17841, 8384, 17841, 12410, 8103, 12736, 23171, 8103, 27, 9357, 17841, 27969, 8103, 10674, 8210, 17841, 10172, 8103, 48278, 8103, 10470, 6800, 17841, 27819, 26964, 9357, 28775, 10470, 12605, 8103, 10659, 48, 8103, 10659, 15113, 8103, 10659, 6800, 8103, 10659, 27, 8103, 25448, 10470, 17841, 12736, 26964, 14285, 8103, 8210, 15264, 8103, 10659, 10278, 36174, 11423, 26964, 23171, 26964, 7471, 8103, 10659, 10965, 8103, 12736, 23133, 8103, 27, 23133, 8103, 27, 14285, 8103, 10659, 7471, 26964, 6382, 26964, 13859, 36174, 5782, 8103, 12736, 10172, 8103, 14285, 17772, 8103, 14285, 18537, 8103, 10659, 7487, 2],
+      decoded: "<s>\ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0</s>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u00e2\u013e", "\u00a8", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141\u0133", "\u0123", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141\u0133", "\u00b1", "\u00f0\u0141", "\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0137", "\u00b5", "\u00e2\u0122", "\u012f", "\u00e2\u013b", "\u0124", "\u00ef\u00b8\u0131", "\u0120\u00f0\u0141", "\u00a7", "\u013b", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00e2\u013b", "\u0124", "\u0120\u00f0\u0141\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u012e", "\u00be", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u0120\u00f0\u0141\u0133", "\u00a9", "\u00e2\u0122", "\u012f", "\u00e2\u013f", "\u00a4", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0134", "\u012d", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u0133", "\u00a8", "\u0120\u00f0\u0141\u0133", "\u00a9", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u0133", "\u00a9", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u0133", "\u00a7", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u0133", "\u00a6", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141", "\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0131", "\u00b4", "\u00f3", "\u0142", "\u0123", "\u00a7", "\u00f3", "\u0142", "\u0123", "\u00a2", "\u00f3", "\u0142", "\u0123", "\u00a5", "\u00f3", "\u0142", "\u0123", "\u00ae", "\u00f3", "\u0142", "\u0123", "\u00a7", "\u00f3", "\u0142", "\u0123", "\u00bf", "\u0120\u00f0\u0141\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00e2\u013f", "\u00a4", "\u00ef\u00b8\u0131", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0134", "\u012d", "\u00e2\u0122", "\u012f", "\u00f0\u0141\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bc"],
+      ids: [0, 39817, 11423, 8103, 10470, 6800, 26964, 10172, 12605, 26964, 15389, 6569, 9357, 2023, 8103, 15722, 8906, 17, 8384, 38718, 9264, 12605, 8103, 6248, 27, 6569, 9357, 2023, 17, 8384, 38718, 9264, 26964, 11423, 6569, 9357, 2023, 17, 8384, 6569, 14285, 4726, 8103, 6248, 3602, 17, 8384, 6569, 10470, 46, 17, 8384, 6569, 6248, 3602, 26964, 15375, 17, 8384, 30151, 10470, 17, 8384, 6569, 10659, 13859, 17, 8384, 31193, 11423, 26964, 15375, 17, 8384, 31193, 15375, 17, 8384, 31193, 6248, 17, 8384, 31193, 18164, 8103, 6248, 3602, 6569, 9357, 2023, 17, 8384, 6569, 10470, 46, 17, 8384, 6569, 6248, 3602, 6569, 9357, 2023, 8103, 9357, 20024, 49078, 21402, 10172, 6248, 49078, 21402, 10172, 7258, 49078, 21402, 10172, 8210, 49078, 21402, 10172, 2840, 49078, 21402, 10172, 6248, 49078, 21402, 10172, 9470, 26964, 11423, 6569, 9357, 2023, 17, 8384, 30151, 10470, 12605, 17, 8384, 6569, 10659, 13859, 17, 8384, 31193, 11423, 6569, 9357, 4394, 2],
+      decoded: "<s>\u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75\u200d\u2642\ufe0f \ud83e\uddd9\ud83c\udffb\u200d\u2642 \ud83d\udc68\ud83c\udffb\u200d\ud83c\udf3e \ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1 \ud83d\udc69\u200d\u2764\u200d\ud83d\udc8b\u200d\ud83d\udc68 \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 \ud83e\uddd1\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc68\ud83c\udffc</s>",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["ah", "\u00e5\u012f", "\u013c", "\u00e6", "\u0130", "\u00a8", "zz"],
+      ids: [0, 895, 47658, 15113, 37127, 12736, 11423, 7399, 2],
+      decoded: "<s>ah\u535a\u63a8zz</s>",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["\u0120", "\u0109", "He", "LL", "o", "!", "how", "\u0120", "\u0120", "\u010a", "\u0120Are", "\u0120yo", "U", "?", "\u0120", "\u0120"],
+      ids: [0, 1437, 50117, 894, 6006, 139, 328, 9178, 1437, 1437, 50118, 3945, 25610, 791, 116, 1437, 1437, 2],
+      decoded: "<s> \tHeLLo!how  \n Are yoU?  </s>",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["\u0120", "\u0109", "H", "\u00c3\u00a4", "LL", "o", "!", "how", "\u0120", "\u0120", "\u010a", "\u0120Are", "\u0120yo", "U", "?", "\u0120", "\u0120"],
+      ids: [0, 1437, 50117, 725, 1561, 6006, 139, 328, 9178, 1437, 1437, 50118, 3945, 25610, 791, 116, 1437, 1437, 2],
+      decoded: "<s> \tH\u00e4LLo!how  \n Are yoU?  </s>",
+    },
+  },
+  "Xenova/EsperBERTo-small-pos": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "\u0120are", "\u0120you", "\u0120do", "ing", "?"],
+      ids: [0, 50702, 1694, 12426, 661, 948, 35, 2],
+      decoded: "<s>How are you doing?</s>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "\u0120sho", "uld", "'ve", "\u0120don", "e", "\u0120this"],
+      ids: [0, 36894, 21906, 8512, 6091, 851, 73, 18955, 2],
+      decoded: "<s>You should've done this</s>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0", "123456", "78", "9", "\u01200", "\u01201", "\u01202", "\u01203", "\u01204", "\u01205", "\u01206", "\u01207", "\u01208", "\u01209", "\u012010", "\u0120100", "\u01201000"],
+      ids: [0, 20, 11816, 6229, 29, 2042, 355, 411, 620, 818, 839, 1031, 1142, 1166, 1274, 1312, 2450, 8403, 2],
+      decoded: "<s>0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000</s>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120c", "ompan", "y", "\u0120was", "\u0120fo", "und", "ed", "\u0120in", "\u01202016", "."],
+      ids: [0, 5490, 467, 2833, 93, 30687, 1204, 3936, 347, 327, 3653, 18, 2],
+      decoded: "<s>The company was founded in 2016.</s>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'ll", "\u0120!", "!", "to", "?", "'", "d", "''", "d", "\u0120of", ",", "\u0120can", "'t", "."],
+      ids: [0, 37, 203, 17792, 4883, 5, 288, 35, 11, 72, 15271, 72, 682, 16, 15597, 3761, 18, 2],
+      decoded: "<s>A\n'll!!to?'d''d of, can't.</s>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["de", "f", "\u0120ma", "in", "(", "):", "\u010a", "\u0109", "pas", "s"],
+      ids: [0, 387, 74, 633, 282, 12, 3914, 203, 202, 1208, 87, 2],
+      decoded: "<s>def main():\n\tpass</s>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120ob", "j", ".", "to", "S", "tr", "ing", "(", ");", "\u010a", "to", "S", "tr", "ing", "(", ");"],
+      ids: [0, 5745, 278, 3945, 1080, 78, 18, 288, 55, 497, 948, 12, 3429, 203, 288, 55, 497, 948, 12, 3429, 2],
+      decoded: "<s>let a = obj.toString();\ntoString();</s>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "w", "ant", "\u00c3\u00a9", "d", ",", "run", "ning"],
+      ids: [0, 7390, 91, 1799, 1174, 72, 16, 1307, 13715, 2],
+      decoded: "<s>UNwant\u00e9d,running</s>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hel", "lo", "\u0120World"],
+      ids: [0, 4152, 310, 7717, 2],
+      decoded: "<s>Hello World</s>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hel", "lo", "\u0120wor", "ld"],
+      ids: [0, 1686, 310, 39013, 3580, 2],
+      decoded: "<s>hello world</s>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136", "\u0141", "\u00e6", "\u00b4", "\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e", "\u0141", "\u00e8", "\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [0, 25856, 258, 167, 117, 124, 19584, 42803, 258, 169, 113, 254, 34946, 2],
+      decoded: "<s>\u751f\u6d3b\u7684\u771f\u8c1b\u662f</s>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120\u0120", "\u0120le", "ading", "\u0120space"],
+      ids: [0, 2399, 591, 30214, 51965, 2],
+      decoded: "<s>   leading space</s>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tra", "i", "ling", "\u0120space", "\u0120\u0120", "\u0120"],
+      ids: [0, 440, 77, 879, 51965, 2399, 225, 2],
+      decoded: "<s>trailing space   </s>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["Hi", "\u0120", "\u0120Hel", "lo"],
+      ids: [0, 15893, 225, 3558, 310, 2],
+      decoded: "<s>Hi  Hello</s>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2", "\u00a5", "6", "\u0120\u00e2", "\u0124", "\u00a3", "7", "\u0120\u00e2", "\u0124", "\u00b9", "8", "\u0120\u00e2", "\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [0, 30747, 9416, 21, 462, 22, 4668, 23, 7537, 24, 12407, 25, 790, 103, 26, 2097, 229, 101, 27, 2097, 229, 122, 28, 2097, 229, 114, 29, 18885, 2],
+      decoded: "<s>test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test</s>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120bo", "u", "ght", "\u0120an", "\u0120ap", "ple", "\u0120for", "\u0120$", "1", ".", "00", "\u0120at", "\u0120the", "\u0120s", "tore", "."],
+      ids: [0, 45, 1716, 89, 8840, 353, 560, 720, 434, 9416, 21, 18, 455, 3993, 2814, 275, 14003, 18, 2],
+      decoded: "<s>I bought an apple for $1.00 at the store.</s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142"],
+      ids: [0, 9642, 1322, 131, 259, 131, 259, 2],
+      decoded: "<s>you\u2026\u00a0\u00a0</s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142", "you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142"],
+      ids: [0, 9642, 1322, 131, 259, 131, 259, 9642, 1322, 131, 259, 131, 259, 2],
+      decoded: "<s>you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0</s>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ir", "d", "\u0120", "\u00ef\u00bd", "\u0140", "\u0120ed", "ge", "\u0120", "\u00ef\u00bd", "\u0140", "\u0120c", "ase"],
+      ids: [0, 4983, 861, 72, 225, 30624, 257, 1263, 587, 225, 30624, 257, 467, 14285, 2],
+      decoded: "<s>weird \uff5e edge \uff5e case</s>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u00f0\u0141\u013a", "\u0124", "\u0120\u00f0\u0141", "\u0133", "\u012f", "\u0120\u00f0\u0141", "\u00a4", "\u00a3", "\u0120\u00f0\u0141\u013a", "\u012f", "\u0120\u00f0\u0141\u013a", "\u0143", "\u0120\u00f0\u0141", "\u0130", "\u012b", "\u0120\u00f0\u0141", "\u013b", "\u0131", "\u0120\u00f0\u0141\u013a", "\u012c", "\u0120\u00f0\u0141", "\u0136", "\u00a5", "\u0120\u00f0\u0141\u013a", "\u0123", "\u0120\u00f0\u0141\u013a", "\u0127", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141\u013a", "\u0128", "\u0120\u00f0\u0141", "\u0133", "\u0131", "\u0120\u00e2", "\u013f", "\u00a4", "\u00ef\u00b8", "\u0131", "\u0120\u00f0\u0141", "\u0134", "\u013e", "\u0120\u00f0\u0141", "\u0134", "\u013c", "\u0120\u00f0\u0141", "\u0134", "\u0139", "\u0120\u00f0\u0141", "\u0134", "\u013b", "\u0120\u00f0\u0141", "\u0138", "\u00a4", "\u0120\u00f0\u0141\u013a", "\u0130", "\u0120\u00f0\u0141", "\u0133", "\u012e", "\u0120\u00f0\u0141", "\u00a5", "\u00b3", "\u0120\u00f0\u0141", "\u0134", "\u00aa", "\u0120\u00e2", "\u013e", "\u00a8", "\u0120\u00f0\u0141", "\u0133", "\u012b", "\u0120\u00f0\u0141", "\u0133", "\u0122", "\u0120\u00f0\u0141", "\u0134", "\u00af", "\u0120\u00f0\u0141", "\u0130", "\u012a", "\u0120\u00f0\u0141", "\u013b", "\u012a", "\u0120\u00f0\u0141", "\u013b", "\u012e", "\u0120\u00f0\u0141", "\u0134", "\u0122", "\u0120\u00f0\u0141", "\u0133", "\u0129", "\u0120\u00f0\u0141", "\u0133", "\u012d", "\u0120\u00e2", "\u013e", "\u0127", "\u0120\u00f0\u0141", "\u0130", "\u0123", "\u0120\u00f0\u0141", "\u012e", "\u0140", "\u0120\u00f0\u0141", "\u012e", "\u00b8", "\u0120\u00f0\u0141", "\u0134", "\u00b0"],
+      ids: [0, 10626, 229, 32340, 244, 240, 32340, 102, 101, 30199, 240, 30199, 260, 32340, 241, 236, 32340, 252, 242, 30199, 237, 32340, 247, 103, 30199, 228, 30199, 232, 32340, 102, 250, 30199, 233, 32340, 244, 242, 2097, 256, 102, 27027, 242, 32340, 245, 255, 32340, 245, 253, 32340, 245, 250, 32340, 245, 252, 32340, 249, 102, 30199, 241, 32340, 244, 239, 32340, 103, 116, 32340, 245, 108, 2097, 255, 106, 32340, 244, 236, 32340, 244, 227, 32340, 245, 112, 32340, 241, 235, 32340, 252, 235, 32340, 252, 239, 32340, 245, 227, 32340, 244, 234, 32340, 244, 238, 2097, 255, 232, 32340, 241, 228, 32340, 239, 257, 32340, 239, 121, 32340, 245, 113, 2],
+      decoded: "<s>\ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c \ud83e\udd73 \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0</s>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u00e2", "\u013e", "\u00a8", "\u0120\u00f0\u0141", "\u00a4", "\u0139", "\u0120\u00f0\u0141", "\u0133", "\u0123", "\u00ef\u00b8", "\u0131", "\u0120\u00f0\u0141", "\u0133", "\u00b1", "\u00f0\u0141", "\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0137", "\u00b5", "\u00e2\u0122", "\u012f", "\u00e2\u013b", "\u0124", "\u00ef\u00b8", "\u0131", "\u0120\u00f0\u0141", "\u00a7", "\u013b", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00e2\u013b", "\u0124", "\u0120\u00f0\u0141", "\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u012e", "\u00be", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u0120\u00f0\u0141", "\u0133", "\u00a9", "\u00e2\u0122", "\u012f", "\u00e2", "\u013f", "\u00a4", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0134", "\u012d", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a8", "\u0120\u00f0\u0141", "\u0133", "\u00a9", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a9", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a7", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a6", "\u0120\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a4", "\u013f", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u00a7", "\u0133", "\u00f0\u0141", "\u0131", "\u00bb", "\u0120\u00f0\u0141", "\u0131", "\u00b4", "\u00f3", "\u0142", "\u0123", "\u00a7", "\u00f3", "\u0142", "\u0123", "\u00a2", "\u00f3", "\u0142", "\u0123", "\u00a5", "\u00f3", "\u0142", "\u0123", "\u00ae", "\u00f3", "\u0142", "\u0123", "\u00a7", "\u00f3", "\u0142", "\u0123", "\u00bf", "\u0120\u00f0\u0141", "\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bb", "\u00e2\u0122", "\u012f", "\u00e2", "\u013f", "\u00a4", "\u00ef\u00b8", "\u0131", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0134", "\u012d", "\u00e2\u0122", "\u012f", "\u00f0\u0141", "\u0133", "\u00a8", "\u00f0\u0141", "\u0131", "\u00bc"],
+      ids: [0, 163, 255, 106, 32340, 102, 250, 32340, 244, 228, 27027, 242, 32340, 244, 114, 9132, 242, 124, 32340, 248, 118, 348, 240, 20419, 229, 27027, 242, 32340, 105, 252, 9132, 242, 124, 348, 240, 20419, 229, 32340, 244, 106, 9132, 242, 124, 348, 240, 9132, 239, 127, 32340, 105, 244, 348, 240, 9132, 102, 256, 348, 240, 9132, 105, 244, 32340, 244, 107, 348, 240, 163, 256, 102, 348, 240, 9132, 245, 238, 348, 240, 9132, 244, 106, 32340, 244, 107, 348, 240, 9132, 244, 107, 348, 240, 9132, 244, 105, 348, 240, 9132, 244, 104, 32340, 105, 244, 9132, 242, 124, 348, 240, 9132, 102, 256, 348, 240, 9132, 105, 244, 9132, 242, 124, 32340, 242, 117, 180, 259, 228, 105, 180, 259, 228, 100, 180, 259, 228, 103, 180, 259, 228, 111, 180, 259, 228, 105, 180, 259, 228, 128, 32340, 244, 106, 9132, 242, 124, 348, 240, 163, 256, 102, 27027, 242, 348, 240, 9132, 245, 238, 348, 240, 9132, 244, 106, 9132, 242, 125, 2],
+      decoded: "<s>\u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75\u200d\u2642\ufe0f \ud83e\uddd9\ud83c\udffb\u200d\u2642 \ud83d\udc68\ud83c\udffb\u200d\ud83c\udf3e \ud83e\uddd1\u200d\ud83e\udd1d\u200d\ud83e\uddd1 \ud83d\udc69\u200d\u2764\u200d\ud83d\udc8b\u200d\ud83d\udc68 \ud83d\udc69\u200d\ud83d\udc69\u200d\ud83d\udc67\u200d\ud83d\udc66 \ud83e\uddd1\ud83c\udffb\u200d\ud83e\udd1d\u200d\ud83e\uddd1\ud83c\udffb \ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f \ud83d\udc68\ud83c\udffb\u200d\u2764\ufe0f\u200d\ud83d\udc8b\u200d\ud83d\udc68\ud83c\udffc</s>",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["\u0120", "\u0109", "He", "L", "Lo", "!", "ho", "w", "\u0120\u0120", "\u010a", "\u0120Are", "\u0120yo", "U", "?", "\u0120\u0120"],
+      ids: [0, 225, 202, 13029, 48, 4876, 5, 882, 91, 2399, 203, 31676, 27961, 57, 35, 2399, 2],
+      decoded: "<s> \tHeLLo!how  \n Are yoU?  </s>",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["\u0120", "\u0109", "H", "\u00c3\u00a4", "L", "Lo", "!", "ho", "w", "\u0120\u0120", "\u010a", "\u0120Are", "\u0120yo", "U", "?", "\u0120\u0120"],
+      ids: [0, 225, 202, 44, 3203, 48, 4876, 5, 882, 91, 2399, 203, 31676, 27961, 57, 35, 2399, 2],
+      decoded: "<s> \tH\u00e4LLo!how  \n Are yoU?  </s>",
+    },
+  },
+};
diff --git a/tests/models/t5/tokenization.js b/tests/models/t5/tokenization.js
new file mode 100644
index 000000000..43e7a0fd9
--- /dev/null
+++ b/tests/models/t5/tokenization.js
@@ -0,0 +1,296 @@
+import { T5Tokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, SENTENCEPIECE_TEST_STRINGS, T5_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = T5Tokenizer;
+export const TEST_CONFIG = {
+  "Xenova/t5-small": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["\u2581How", "\u2581are", "\u2581you", "\u2581doing", "?"],
+      ids: [571, 33, 25, 692, 58, 1],
+      decoded: "How are you doing?</s>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["\u2581You", "\u2581should", "'", "ve", "\u2581done", "\u2581this"],
+      ids: [148, 225, 31, 162, 612, 48, 1],
+      decoded: "You should've done this</s>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["\u258101", "23", "45", "67", "89", "\u2581", "0", "\u25811", "\u25812", "\u25813", "\u25814", "\u25815", "\u25816", "\u25817", "\u25818", "\u25819", "\u258110", "\u2581100", "\u25811000"],
+      ids: [7088, 2773, 2128, 3708, 3914, 3, 632, 209, 204, 220, 314, 305, 431, 489, 505, 668, 335, 910, 5580, 1],
+      decoded: "0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000</s>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["\u2581The", "\u2581company", "\u2581was", "\u2581founded", "\u2581in", "\u25812016."],
+      ids: [37, 349, 47, 5710, 16, 4619, 1],
+      decoded: "The company was founded in 2016.</s>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["\u2581A", "\u2581", "'", "ll", "\u2581", "!!", "to", "?", "'", "d", "'", "'", "d", "\u2581of", ",", "\u2581can", "'", "t", "."],
+      ids: [71, 3, 31, 195, 3, 1603, 235, 58, 31, 26, 31, 31, 26, 13, 6, 54, 31, 17, 5, 1],
+      decoded: "A 'll!!to?'d''d of, can't.</s>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["\u2581de", "f", "\u2581main", "()", ":", "\u2581pass"],
+      ids: [20, 89, 711, 9960, 10, 1903, 1],
+      decoded: "def main(): pass</s>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["\u2581let", "\u2581", "a", "\u2581=", "\u2581", "o", "b", "j", ".", "to", "Str", "ing", "()", ";", "\u2581to", "Str", "ing", "()", ";"],
+      ids: [752, 3, 9, 3274, 3, 32, 115, 354, 5, 235, 11500, 53, 9960, 117, 12, 11500, 53, 9960, 117, 1],
+      decoded: "let a = obj.toString(); toString();</s>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["\u2581This", "\u2581is", "\u2581", "a", "\u2581test", "."],
+      ids: [100, 19, 3, 9, 794, 5, 1],
+      decoded: "This is a test.</s>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["\u2581UN", "wan", "t\u00e9", "d", ",", "running"],
+      ids: [4417, 3877, 2229, 26, 6, 24549, 1],
+      decoded: "UNwant\u00e9d,running</s>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["\u25811", "\u0000", "2", "\u25813"],
+      ids: [209, 2, 357, 220, 1],
+      decoded: "1<unk>2 3</s>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["\u2581Hello", "\u2581World"],
+      ids: [8774, 1150, 1],
+      decoded: "Hello World</s>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["\u2581hello", "\u2581world"],
+      ids: [21820, 296, 1],
+      decoded: "hello world</s>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u2581", "\u751f\u6d3b\u7684\u771f\u8c1b\u662f"],
+      ids: [3, 2, 1],
+      decoded: "<unk></s>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u2581leading", "\u2581space"],
+      ids: [1374, 628, 1],
+      decoded: "leading space</s>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581trail", "ing", "\u2581space"],
+      ids: [5032, 53, 628, 1],
+      decoded: "trailing space</s>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["\u2581Hi", "\u2581Hello"],
+      ids: [2018, 8774, 1],
+      decoded: "Hi Hello</s>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["\u2581test", "\u2581$1", "\u2581R", "2", "\u2581#3", "\u2581\u20ac", "4", "\u2581\u00a35", "\u2581", "\u00a5", "6", "\u2581", "\u20a3", "7", "\u2581", "\u20b9", "8", "\u2581", "\u20b1", "9", "\u2581test"],
+      ids: [794, 1970, 391, 357, 20206, 3416, 591, 23978, 3, 2, 948, 3, 2, 940, 3, 2, 927, 3, 2, 1298, 794, 1],
+      decoded: "test $1 R2 #3 \u20ac4 \u00a35 <unk>6 <unk>7 <unk>8 <unk>9 test</s>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["\u2581I", "\u2581bought", "\u2581an", "\u2581apple", "\u2581for", "\u2581$1", ".00", "\u2581at", "\u2581the", "\u2581store", "."],
+      ids: [27, 2944, 46, 8947, 21, 1970, 4200, 44, 8, 1078, 5, 1],
+      decoded: "I bought an apple for $1.00 at the store.</s>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u2581you", "..."],
+      ids: [25, 233, 1],
+      decoded: "you...</s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u2581you", "..."],
+      ids: [25, 233, 1],
+      decoded: "you...</s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u2581you", "...", "\u2581you", "..."],
+      ids: [25, 233, 25, 233, 1],
+      decoded: "you... you...</s>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["\u2581weird", "\u2581", "\uff5e", "\u2581edge", "\u2581", "\uff5e", "\u2581case"],
+      ids: [10088, 3, 2, 3023, 3, 2, 495, 1],
+      decoded: "weird <unk> edge <unk> case</s>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581This", "\u2581is", "\u2581", "a", "\u2581test", "\u2581", "."],
+      ids: [100, 19, 3, 9, 794, 3, 5, 1],
+      decoded: "This is a test.</s>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u2581", "\ud83d\ude02", "\u2581", "\ud83d\udc4d", "\u2581", "\ud83e\udd23", "\u2581", "\ud83d\ude0d", "\u2581", "\ud83d\ude2d", "\u2581", "\ud83c\udf89", "\u2581", "\ud83d\ude4f", "\u2581", "\ud83d\ude0a", "\u2581", "\ud83d\udd25", "\u2581", "\ud83d\ude01", "\u2581", "\ud83d\ude05", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\ude06", "\u2581", "\ud83d\udc4f", "\u2581", "\u2764\ufe0f", "\u2581", "\ud83d\udc9c", "\u2581", "\ud83d\udc9a", "\u2581", "\ud83d\udc97", "\u2581", "\ud83d\udc99", "\u2581", "\ud83d\udda4", "\u2581", "\ud83d\ude0e", "\u2581", "\ud83d\udc4c", "\u2581", "\ud83e\udd73", "\u2581", "\ud83d\udcaa", "\u2581", "\u2728", "\u2581", "\ud83d\udc49", "\u2581", "\ud83d\udc40", "\u2581", "\ud83d\udcaf", "\u2581", "\ud83c\udf88", "\u2581", "\ud83d\ude48", "\u2581", "\ud83d\ude4c", "\u2581", "\ud83d\udc80", "\u2581", "\ud83d\udc47", "\u2581", "\ud83d\udc4b", "\u2581", "\u2705", "\u2581", "\ud83c\udf81", "\u2581", "\ud83c\udf1e", "\u2581", "\ud83c\udf38", "\u2581", "\ud83d\udcb0"],
+      ids: [3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 1],
+      decoded: "<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk></s>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2581", "\u2728", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\udc41\ufe0f", "\u2581", "\ud83d\udc71\ud83c\udffb", "\u2581", "\ud83d\udd75", "\u2581", "\u2642\ufe0f", "\u2581", "\ud83e\uddd9\ud83c\udffb", "\u2581", "\u2642", "\u2581", "\ud83d\udc68\ud83c\udffb", "\u2581", "\ud83c\udf3e", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83d\udc69", "\u2581", "\u2764", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc67", "\u2581", "\ud83d\udc66", "\u2581", "\ud83e\uddd1\ud83c\udffb", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1\ud83c\udffb", "\u2581", "\ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f", "\u2581", "\ud83d\udc68\ud83c\udffb", "\u2581", "\u2764\ufe0f", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68\ud83c\udffc"],
+      ids: [3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 1],
+      decoded: "<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk></s>",
+    },
+    SPECIAL_WITH_TRAILING_WHITESPACE: {
+      text: SENTENCEPIECE_TEST_STRINGS.SPECIAL_WITH_TRAILING_WHITESPACE,
+      tokens: ["\u2581", "<", "s", ">"],
+      ids: [3, 2, 7, 3155, 1],
+      decoded: "<unk>s></s>",
+    },
+    SPECIAL_SURROUNDED_BY_WHITESPACE: {
+      text: SENTENCEPIECE_TEST_STRINGS.SPECIAL_SURROUNDED_BY_WHITESPACE,
+      tokens: ["</s>", "\u2581test", "</s>"],
+      ids: [1, 794, 1, 1],
+      decoded: "</s> test</s></s>",
+    },
+    SPECIAL_NO_WHITESPACE: {
+      text: SENTENCEPIECE_TEST_STRINGS.SPECIAL_NO_WHITESPACE,
+      tokens: ["</s>", "\u2581test", "</s>"],
+      ids: [1, 794, 1, 1],
+      decoded: "</s> test</s></s>",
+    },
+    PREPEND_SCHEME: {
+      text: T5_TEST_STRINGS.PREPEND_SCHEME,
+      tokens: ["\u2581Hey", "</s>", "\u2581", ".", "\u2581how", "\u2581are", "\u2581you"],
+      ids: [9459, 1, 3, 5, 149, 33, 25, 1],
+      decoded: "Hey</s>. how are you</s>",
+    },
+  },
+  "Xenova/t5-tokenizer-new": {
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581trail", "ing", "\u2581space", "\u2581"],
+      ids: [5032, 53, 628, 3, 1],
+      decoded: "trailing space </s>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u2581you", "...", "\u2581"],
+      ids: [25, 233, 3, 1],
+      decoded: "you... </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u2581you", "...", "\u2581"],
+      ids: [25, 233, 3, 1],
+      decoded: "you... </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u2581you", "...", "\u2581you", "...", "\u2581"],
+      ids: [25, 233, 25, 233, 3, 1],
+      decoded: "you... you... </s>",
+    },
+    SPECIAL_WITH_TRAILING_WHITESPACE: {
+      text: SENTENCEPIECE_TEST_STRINGS.SPECIAL_WITH_TRAILING_WHITESPACE,
+      tokens: ["\u2581", "<", "s", ">", "\u2581"],
+      ids: [3, 2, 7, 3155, 3, 1],
+      decoded: "<unk>s> </s>",
+    },
+    SPECIAL_SURROUNDED_BY_WHITESPACE: {
+      text: SENTENCEPIECE_TEST_STRINGS.SPECIAL_SURROUNDED_BY_WHITESPACE,
+      tokens: ["\u2581", "</s>", "\u2581test", "\u2581", "</s>", "\u2581"],
+      ids: [3, 1, 794, 3, 1, 3, 1],
+      decoded: "</s> test </s> </s>",
+    },
+    SPECIAL_NO_WHITESPACE: {
+      text: SENTENCEPIECE_TEST_STRINGS.SPECIAL_NO_WHITESPACE,
+      tokens: ["</s>", "test", "</s>"],
+      ids: [1, 4377, 1, 1],
+      decoded: "</s>test</s></s>",
+    },
+    PREPEND_SCHEME: {
+      text: T5_TEST_STRINGS.PREPEND_SCHEME,
+      tokens: ["\u2581Hey", "\u2581", "</s>", ".", "\u2581how", "\u2581are", "\u2581you"],
+      ids: [9459, 3, 1, 5, 149, 33, 25, 1],
+      decoded: "Hey </s>. how are you</s>",
+    },
+  },
+  "Xenova/LaMini-Flan-T5-783M": {
+    PREPEND_SCHEME: {
+      text: T5_TEST_STRINGS.PREPEND_SCHEME,
+      tokens: ["\u2581Hey", "\u2581", "</s>", "\u2581", ".", "\u2581how", "\u2581are", "\u2581you"],
+      ids: [9459, 3, 1, 3, 5, 149, 33, 25, 1],
+      decoded: "Hey </s>. how are you</s>",
+    },
+  },
+};
+
+// Test that tokenizer type can be inferred (`type: "Unigram"` is missing)
+TEST_CONFIG["google-t5/t5-small"] = TEST_CONFIG["Xenova/t5-small"];
+
+const MAX_EXECUTION_TIME = 10_000;
+export const CUSTOM_TESTS = () => {
+  // Tests to ensure that no matter what, the correct tokenization is returned.
+  // This is necessary since there are sometimes bugs in the transformers library.
+  describe("hard-coded", () => {
+    const TESTS = {
+      // legacy=false
+      "Xenova/t5-tokenizer-new": [
+        {
+          data: {
+            // https://github.com/huggingface/transformers/pull/26678
+            // ['▁Hey', '▁', '</s>', '.', '▁how', '▁are', '▁you']
+            "Hey </s>. how are you": [9459, 3, 1, 5, 149, 33, 25],
+          },
+          reversible: true,
+          legacy: null,
+        },
+        {
+          data: {
+            "</s>\n": [1, 3],
+            "A\n'll": [71, 3, 31, 195],
+          },
+          reversible: false,
+          legacy: null,
+        },
+      ],
+    };
+
+    for (const [tokenizerName, test_data] of Object.entries(TESTS)) {
+      it(
+        tokenizerName,
+        async () => {
+          for (const { data, reversible, legacy } of test_data) {
+            const tokenizer = await T5Tokenizer.from_pretrained(tokenizerName, { legacy });
+
+            for (const [text, expected] of Object.entries(data)) {
+              const token_ids = tokenizer.encode(text, { add_special_tokens: false });
+              expect(token_ids).toEqual(expected);
+
+              // If reversible, test that decoding produces the original text
+              if (reversible) {
+                const decoded = tokenizer.decode(token_ids);
+                expect(decoded).toEqual(text);
+              }
+            }
+          }
+        },
+        MAX_EXECUTION_TIME,
+      );
+    }
+  });
+};
diff --git a/tests/models/test_strings.js b/tests/models/test_strings.js
new file mode 100644
index 000000000..80793c9ad
--- /dev/null
+++ b/tests/models/test_strings.js
@@ -0,0 +1,115 @@
+export const BASE_TEST_STRINGS = {
+  SIMPLE: "How are you doing?",
+  SIMPLE_WITH_PUNCTUATION: "You should've done this",
+  NUMBERS: "0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000",
+  TEXT_WITH_NUMBERS: "The company was founded in 2016.",
+  PUNCTUATION: "A\n'll !!to?'d''d of, can't.",
+  PYTHON_CODE: "def main():\n\tpass",
+  JAVASCRIPT_CODE: "let a = obj.toString();\ntoString();",
+  NEWLINES: "This\n\nis\na\ntest.",
+  BASIC: "UNwant\u00e9d,running",
+  CONTROL_TOKENS: "1\u00002\uFFFD3",
+  HELLO_WORLD_TITLECASE: "Hello World",
+  HELLO_WORLD_LOWERCASE: "hello world",
+  CHINESE_ONLY: "生活的真谛是",
+  LEADING_SPACE: "   leading space",
+  TRAILING_SPACE: "trailing space   ",
+  SURROUNDING_SPACE: "   surrounding space   ",
+  DOUBLE_SPACE: "Hi  Hello",
+  CURRENCY: "test $1 R2 #3 €4 £5 ¥6 ₣7 ₹8 ₱9 test",
+  CURRENCY_WITH_DECIMALS: "I bought an apple for $1.00 at the store.",
+  ELLIPSIS: "you…  ",
+  TEXT_WITH_ESCAPE_CHARACTERS: "\u0079\u006F\u0075\u2026\u00A0\u00A0",
+  TEXT_WITH_ESCAPE_CHARACTERS_2: "\u0079\u006F\u0075\u2026\u00A0\u00A0\u0079\u006F\u0075\u2026\u00A0\u00A0",
+  TILDE_NORMALIZATION: "weird \uFF5E edge \uFF5E case",
+  SPIECE_UNDERSCORE: "▁This ▁is ▁a ▁test ▁.",
+  POPULAR_EMOJIS: "😂 👍 🤣 😍 😭 🎉 🙏 😊 🔥 😁 😅 🤗 😆 👏 ❤️ 💜 💚 💗 💙 🖤 😎 👌 🥳 💪 ✨ 👉 👀 💯 🎈 🙈 🙌 💀 👇 👋 ✅ 🎁 🌞 🌸 💰",
+  MULTIBYTE_EMOJIS: "✨ 🤗 👁️ 👱🏻 🕵‍♂️ 🧙🏻‍♂ 👨🏻‍🌾 🧑‍🤝‍🧑 👩‍❤‍💋‍👨 👩‍👩‍👧‍👦 🧑🏻‍🤝‍🧑🏻 🏴󠁧󠁢󠁥󠁮󠁧󠁿 👨🏻‍❤️‍💋‍👨🏼", // 1 2 3 4 5 6 7 8 10 11 12 14 15
+  ONLY_WHITESPACE: " \t\n",
+};
+
+export const BERT_TEST_STRINGS = {
+  CHINESE_LATIN_MIXED: "ah\u535a\u63a8zz",
+  SIMPLE_WITH_ACCENTS: "H\u00e9llo",
+  MIXED_CASE_WITHOUT_ACCENTS: " \tHeLLo!how  \n Are yoU?  ",
+  MIXED_CASE_WITH_ACCENTS: " \tHäLLo!how  \n Are yoU?  ",
+};
+
+// SentencePiece-specific test cases
+export const SENTENCEPIECE_TEST_STRINGS = {
+  SPECIAL_WITH_TRAILING_WHITESPACE: "<s>\n",
+  SPECIAL_SURROUNDED_BY_WHITESPACE: " </s> test </s> ",
+  SPECIAL_NO_WHITESPACE: "</s>test</s>",
+};
+
+// Additional test-cases for the Llama tokenizer, adapted from
+// https://github.com/belladoreai/llama-tokenizer-js/blob/master/llama-tokenizer.js#L381-L452
+export const LLAMA_TEST_STRINGS = {
+  BPE_SCORES_PRIORITY_1: "grabbed",
+  BPE_SCORES_PRIORITY_2: " grabbed",
+  BPE_SCORES_PRIORITY_3: "           grabbed",
+  NEWLINE: "\n",
+  NEWLINES: "ax\n####\nboo",
+  NEWLINE_WITH_LEADING_SPACE: " \n",
+  TABS: "	tabs				out here",
+  NEWLINE_AND_TAB: "\n\t\n",
+  CHINESE_LETTER: "镇",
+  EMOJIS_1: "🦙",
+  EMOJIS_2: "🦙Ꙋ",
+  EMOJIS_3: "Ꙋ🦙",
+  PARAGRAPH: 'The llama (/ˈlɑːmə/; 🦙Spanish pronunciation: [ˈʎama]) (Lama glama) is a domesticated South American camelid, widely used as a meat and pack animal by Andean cultures since the Pre-Columbian era. Llamas are social animals and live with others as a herd. Their wool is soft and contains only a small amount of lanolin.[2] Llamas can learn simple tasks after a few repetitions. When using a pack, they can carry about 25 to 30% of their body weight for 8 to 13 km (5–8 miles).[3] The name llama (in the past also spelled "lama" or "glama") was adopted by European settlers from native Peruvians.[4] The ancestors of llamas are thought to have originated from the Great Plains of North America about 40 million years ago, and subsequently migrated to South America about three million years ago during the Great American Interchange. By the end of the last ice age (10,000–12,000 years ago), camelids were extinct in North America.[3] As of 2007, there were over seven million llamas and alpacas in South America and over 158,000 llamas and 100,000Ꙋ🦙 alpacas, descended from progenitors imported late in the 20th century, in the United States and Canada.[5] In Aymara mythology, llamas are important beings. The Heavenly Llama is said to drink water from the ocean and urinates as it rains.[6] According to Aymara eschatology, llamas will return to the water springs and lagoons where they come from at the end of time.[6]',
+  IGNORE_MERGES: "Ne için gittiğimi falan bilmiyordum, Washington'da belirtilen bir yere rapor vermem gerekiyordu.",
+};
+
+export const VITS_TEST_STRINGS = {
+  BASIC: "abcdefghijklmnopqrstuvwxyz01234567890",
+  // Special treatment of characters in certain language
+  SPECIAL_CHARACTERS: "ț ţ",
+};
+
+export const QWEN_TEST_STRINGS = {
+  PUNCTUATION_SPLIT: "i'm i'M i've i've i'Ve i'vE i'VE",
+};
+
+export const WHISPER_TEST_STRINGS = {
+  SPECIAL_TOKENS: "   <|startoftranscript|> <|en|>   ", // Tests lstrip+rstrip
+};
+
+export const BLENDERBOT_SMALL_TEST_STRINGS = {
+  SPECIAL_TOKENS: "__start__hello world__end__",
+  // The original (python) tokenizer simply joins by spaces (regardless of special tokens or not)
+  WHITESPACE_1: "__start__ hey __end__", // --> ... --> "__start__ hey __end__"
+  WHITESPACE_2: "__start__hey __end__", // --> ... --> "__start__ hey __end__"
+};
+
+export const T5_TEST_STRINGS = {
+  // Tests the new T5 tokenizer, which uses a different prepend_scheme for its pre_tokenizer:
+  // tokenizer._tokenizer.pre_tokenizer = Metaspace(add_prefix_space = True, replacement = "▁", prepend_scheme = "first")
+  // See https://github.com/huggingface/transformers/pull/26678 for more information.
+  //  - Old (incorrect): ['▁Hey', '▁', '</s>', '▁', '.', '▁how', '▁are', '▁you']
+  //  - New (correct):   ['▁Hey', '▁', '</s>', '.', '▁how', '▁are', '▁you']
+  PREPEND_SCHEME: "Hey </s>. how are you",
+};
+
+export const FALCON_TEST_STRINGS = {
+  // Special case for splitting on 3 numbers
+  NUMBERS_SPLIT: "12 and 123 and 1234",
+};
+
+export const ESM_TEST_STRINGS = {
+  // Special tokens
+  SPECIAL_TOKENS: "<unk><pad><mask><cls><eos><bos>",
+  // Actual protein sequences
+  PROTEIN_SEQUENCES_1: "ATTCCGATTCCGATTCCG",
+  PROTEIN_SEQUENCES_2: "ATTTCTCTCTCTCTCTGAGATCGATCGATCGAT",
+};
+
+export const BLOOM_TEST_STRINGS = {
+  END_OF_SENTENCE_PUNCTUATION: "test. test, test! test? test… test。 test， test、 test। test۔ test، test",
+};
+
+export const M2M_100_TEST_STRINGS = {
+  TRANSLATION_INPUTS: "__en__ hello world</s>",
+  HIDNI_TEXT: "जीवन एक चॉकलेट बॉक्स की तरह है।",
+  CHINESE_TEXT: "生活就像一盒巧克力。",
+};
diff --git a/tests/models/vits/tokenization.js b/tests/models/vits/tokenization.js
new file mode 100644
index 000000000..e051d6bf7
--- /dev/null
+++ b/tests/models/vits/tokenization.js
@@ -0,0 +1,76 @@
+import { VitsTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, VITS_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = VitsTokenizer;
+export const TEST_CONFIG = {
+  "Xenova/mms-tts-eng": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["k", "h", "k", "o", "k", "w", "k", " ", "k", "a", "k", "r", "k", "e", "k", " ", "k", "y", "k", "o", "k", "u", "k", " ", "k", "d", "k", "o", "k", "i", "k", "n", "k", "g", "k"],
+      ids: [0, 6, 0, 22, 0, 9, 0, 19, 0, 26, 0, 25, 0, 7, 0, 19, 0, 3, 0, 22, 0, 4, 0, 19, 0, 5, 0, 22, 0, 18, 0, 29, 0, 37, 0],
+      decoded: "how are you doing",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["k", "y", "k", "o", "k", "u", "k", " ", "k", "s", "k", "h", "k", "o", "k", "u", "k", "l", "k", "d", "k", "'", "k", "v", "k", "e", "k", " ", "k", "d", "k", "o", "k", "n", "k", "e", "k", " ", "k", "t", "k", "h", "k", "i", "k", "s", "k"],
+      ids: [0, 3, 0, 22, 0, 4, 0, 19, 0, 8, 0, 6, 0, 22, 0, 4, 0, 21, 0, 5, 0, 1, 0, 32, 0, 7, 0, 19, 0, 5, 0, 22, 0, 29, 0, 7, 0, 19, 0, 33, 0, 6, 0, 18, 0, 8, 0],
+      decoded: "you should've done this",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["k", "0", "k", "1", "k", "2", "k", "3", "k", "4", "k", "5", "k", "6", "k", " ", "k", "0", "k", " ", "k", "1", "k", " ", "k", "2", "k", " ", "k", "3", "k", " ", "k", "4", "k", " ", "k", "5", "k", " ", "k", "6", "k", " ", "k", " ", "k", " ", "k", " ", "k", "1", "k", "0", "k", " ", "k", "1", "k", "0", "k", "0", "k", " ", "k", "1", "k", "0", "k", "0", "k", "0", "k"],
+      ids: [0, 23, 0, 15, 0, 28, 0, 11, 0, 27, 0, 35, 0, 36, 0, 19, 0, 23, 0, 19, 0, 15, 0, 19, 0, 28, 0, 19, 0, 11, 0, 19, 0, 27, 0, 19, 0, 35, 0, 19, 0, 36, 0, 19, 0, 19, 0, 19, 0, 19, 0, 15, 0, 23, 0, 19, 0, 15, 0, 23, 0, 23, 0, 19, 0, 15, 0, 23, 0, 23, 0, 23, 0],
+      decoded: "0123456 0 1 2 3 4 5 6    10 100 1000",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["k", "t", "k", "h", "k", "e", "k", " ", "k", "c", "k", "o", "k", "m", "k", "p", "k", "a", "k", "n", "k", "y", "k", " ", "k", "w", "k", "a", "k", "s", "k", " ", "k", "f", "k", "o", "k", "u", "k", "n", "k", "d", "k", "e", "k", "d", "k", " ", "k", "i", "k", "n", "k", " ", "k", "2", "k", "0", "k", "1", "k", "6", "k"],
+      ids: [0, 33, 0, 6, 0, 7, 0, 19, 0, 12, 0, 22, 0, 17, 0, 13, 0, 26, 0, 29, 0, 3, 0, 19, 0, 9, 0, 26, 0, 8, 0, 19, 0, 20, 0, 22, 0, 4, 0, 29, 0, 5, 0, 7, 0, 5, 0, 19, 0, 18, 0, 29, 0, 19, 0, 28, 0, 23, 0, 15, 0, 36, 0],
+      decoded: "the company was founded in 2016",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["k", "t", "k", "h", "k", "i", "k", "s", "k", "i", "k", "s", "k", "a", "k", "t", "k", "e", "k", "s", "k", "t", "k"],
+      ids: [0, 33, 0, 6, 0, 18, 0, 8, 0, 18, 0, 8, 0, 26, 0, 33, 0, 7, 0, 8, 0, 33, 0],
+      decoded: "thisisatest",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: [],
+      ids: [],
+      decoded: "",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["k", "l", "k", "e", "k", "a", "k", "d", "k", "i", "k", "n", "k", "g", "k", " ", "k", "s", "k", "p", "k", "a", "k", "c", "k", "e", "k"],
+      ids: [0, 21, 0, 7, 0, 26, 0, 5, 0, 18, 0, 29, 0, 37, 0, 19, 0, 8, 0, 13, 0, 26, 0, 12, 0, 7, 0],
+      decoded: "leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["k", "t", "k", "r", "k", "a", "k", "i", "k", "l", "k", "i", "k", "n", "k", "g", "k", " ", "k", "s", "k", "p", "k", "a", "k", "c", "k", "e", "k"],
+      ids: [0, 33, 0, 25, 0, 26, 0, 18, 0, 21, 0, 18, 0, 29, 0, 37, 0, 19, 0, 8, 0, 13, 0, 26, 0, 12, 0, 7, 0],
+      decoded: "trailing space",
+    },
+    SURROUNDING_SPACE: {
+      text: BASE_TEST_STRINGS.SURROUNDING_SPACE,
+      tokens: ["k", "s", "k", "u", "k", "r", "k", "r", "k", "o", "k", "u", "k", "n", "k", "d", "k", "i", "k", "n", "k", "g", "k", " ", "k", "s", "k", "p", "k", "a", "k", "c", "k", "e", "k"],
+      ids: [0, 8, 0, 4, 0, 25, 0, 25, 0, 22, 0, 4, 0, 29, 0, 5, 0, 18, 0, 29, 0, 37, 0, 19, 0, 8, 0, 13, 0, 26, 0, 12, 0, 7, 0],
+      decoded: "surrounding space",
+    },
+    SPECIAL_CHARACTERS: {
+      text: VITS_TEST_STRINGS.SPECIAL_CHARACTERS,
+      tokens: [],
+      ids: [],
+      decoded: "",
+    },
+  },
+  "Xenova/mms-tts-ron": {
+    SPECIAL_CHARACTERS: {
+      text: VITS_TEST_STRINGS.SPECIAL_CHARACTERS,
+      tokens: ["c", "\u0163", "c", " ", "c", "\u0163", "c"],
+      ids: [0, 32, 0, 28, 0, 32, 0],
+      decoded: "\u0163 \u0163",
+    },
+  },
+};
diff --git a/tests/models/wav2vec2/tokenization.js b/tests/models/wav2vec2/tokenization.js
new file mode 100644
index 000000000..e6798f0b0
--- /dev/null
+++ b/tests/models/wav2vec2/tokenization.js
@@ -0,0 +1,472 @@
+import { Wav2Vec2CTCTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, BERT_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = Wav2Vec2CTCTokenizer;
+export const TEST_CONFIG = {
+  "Xenova/wav2vec2-base-960h": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["H", "o", "w", "|", "a", "r", "e", "|", "y", "o", "u", "|", "d", "o", "i", "n", "g", "?"],
+      ids: [11, 3, 3, 4, 3, 3, 3, 4, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3],
+      decoded: "H<unk> <unk> <unk> <unk>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["Y", "o", "u", "|", "s", "h", "o", "u", "l", "d", "'", "v", "e", "|", "d", "o", "n", "e", "|", "t", "h", "i", "s"],
+      ids: [22, 3, 3, 4, 3, 3, 3, 3, 3, 3, 27, 3, 3, 4, 3, 3, 3, 3, 4, 3, 3, 3, 3],
+      decoded: "Y<unk> <unk>'<unk> <unk> <unk>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "|", "0", "|", "1", "|", "2", "|", "3", "|", "4", "|", "5", "|", "6", "|", "7", "|", "8", "|", "9", "|", "1", "0", "|", "1", "0", "0", "|", "1", "0", "0", "0"],
+      ids: [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 3, 4, 3, 3, 3, 4, 3, 3, 3, 3],
+      decoded: "<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["T", "h", "e", "|", "c", "o", "m", "p", "a", "n", "y", "|", "w", "a", "s", "|", "f", "o", "u", "n", "d", "e", "d", "|", "i", "n", "|", "2", "0", "1", "6", "."],
+      ids: [6, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 4, 3, 3, 3, 3, 3],
+      decoded: "T<unk> <unk> <unk> <unk> <unk> <unk>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\n", "'", "l", "l", "|", "!", "!", "t", "o", "?", "'", "d", "'", "'", "d", "|", "o", "f", ",", "|", "c", "a", "n", "'", "t", "."],
+      ids: [7, 3, 27, 3, 3, 4, 3, 3, 3, 3, 3, 27, 3, 27, 27, 3, 4, 3, 3, 3, 4, 3, 3, 3, 27, 3, 3],
+      decoded: "A<unk>'<unk> <unk>'<unk>'<unk> <unk> <unk>'<unk>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["d", "e", "f", "|", "m", "a", "i", "n", "(", ")", ":", "\n", "\t", "p", "a", "s", "s"],
+      ids: [3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
+      decoded: "<unk> <unk>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["l", "e", "t", "|", "a", "|", "=", "|", "o", "b", "j", ".", "t", "o", "S", "t", "r", "i", "n", "g", "(", ")", ";", "\n", "t", "o", "S", "t", "r", "i", "n", "g", "(", ")", ";"],
+      ids: [3, 3, 3, 4, 3, 4, 3, 4, 3, 3, 3, 3, 3, 3, 12, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 12, 3, 3, 3, 3, 3, 3, 3, 3],
+      decoded: "<unk> <unk> <unk> <unk>S<unk>S<unk>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["T", "h", "i", "s", "\n", "\n", "i", "s", "\n", "a", "\n", "t", "e", "s", "t", "."],
+      ids: [6, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
+      decoded: "T<unk>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["U", "N", "w", "a", "n", "t", "\u00e9", "d", ",", "r", "u", "n", "n", "i", "n", "g"],
+      ids: [16, 9, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
+      decoded: "UN<unk>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0000", "2", "\ufffd", "3"],
+      ids: [3, 3, 3, 3, 3],
+      decoded: "<unk>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["H", "e", "l", "l", "o", "|", "W", "o", "r", "l", "d"],
+      ids: [11, 3, 3, 3, 3, 4, 18, 3, 3, 3, 3],
+      decoded: "H<unk> W<unk>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["h", "e", "l", "l", "o", "|", "w", "o", "r", "l", "d"],
+      ids: [3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3],
+      decoded: "<unk> <unk>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u751f", "\u6d3b", "\u7684", "\u771f", "\u8c1b", "\u662f"],
+      ids: [3, 3, 3, 3, 3, 3],
+      decoded: "<unk>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["|", "|", "|", "l", "e", "a", "d", "i", "n", "g", "|", "s", "p", "a", "c", "e"],
+      ids: [4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3],
+      decoded: "<unk> <unk>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["t", "r", "a", "i", "l", "i", "n", "g", "|", "s", "p", "a", "c", "e", "|", "|", "|"],
+      ids: [3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 4, 4, 4],
+      decoded: "<unk> <unk>",
+    },
+    SURROUNDING_SPACE: {
+      text: BASE_TEST_STRINGS.SURROUNDING_SPACE,
+      tokens: ["|", "|", "|", "s", "u", "r", "r", "o", "u", "n", "d", "i", "n", "g", "|", "s", "p", "a", "c", "e", "|", "|", "|"],
+      ids: [4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 4, 4, 4],
+      decoded: "<unk> <unk>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["H", "i", "|", "|", "H", "e", "l", "l", "o"],
+      ids: [11, 3, 4, 4, 11, 3, 3, 3, 3],
+      decoded: "H<unk> H<unk>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["t", "e", "s", "t", "|", "$", "1", "|", "R", "2", "|", "#", "3", "|", "\u20ac", "4", "|", "\u00a3", "5", "|", "\u00a5", "6", "|", "\u20a3", "7", "|", "\u20b9", "8", "|", "\u20b1", "9", "|", "t", "e", "s", "t"],
+      ids: [3, 3, 3, 3, 4, 3, 3, 4, 13, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 3, 3],
+      decoded: "<unk> <unk> R<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "|", "b", "o", "u", "g", "h", "t", "|", "a", "n", "|", "a", "p", "p", "l", "e", "|", "f", "o", "r", "|", "$", "1", ".", "0", "0", "|", "a", "t", "|", "t", "h", "e", "|", "s", "t", "o", "r", "e", "."],
+      ids: [10, 4, 3, 3, 3, 3, 3, 3, 4, 3, 3, 4, 3, 3, 3, 3, 3, 4, 3, 3, 3, 4, 3, 3, 3, 3, 3, 4, 3, 3, 4, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3],
+      decoded: "I <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["y", "o", "u", "\u2026", "|", "|"],
+      ids: [3, 3, 3, 3, 4, 4],
+      decoded: "<unk>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["y", "o", "u", "\u2026", "\u00a0", "\u00a0"],
+      ids: [3, 3, 3, 3, 3, 3],
+      decoded: "<unk>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["y", "o", "u", "\u2026", "\u00a0", "\u00a0", "y", "o", "u", "\u2026", "\u00a0", "\u00a0"],
+      ids: [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
+      decoded: "<unk>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["w", "e", "i", "r", "d", "|", "\uff5e", "|", "e", "d", "g", "e", "|", "\uff5e", "|", "c", "a", "s", "e"],
+      ids: [3, 3, 3, 3, 3, 4, 3, 4, 3, 3, 3, 3, 4, 3, 4, 3, 3, 3, 3],
+      decoded: "<unk> <unk> <unk> <unk> <unk>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581", "T", "h", "i", "s", "|", "\u2581", "i", "s", "|", "\u2581", "a", "|", "\u2581", "t", "e", "s", "t", "|", "\u2581", "."],
+      ids: [3, 6, 3, 3, 3, 4, 3, 3, 3, 4, 3, 3, 4, 3, 3, 3, 3, 3, 4, 3, 3],
+      decoded: "<unk>T<unk> <unk> <unk> <unk> <unk>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\ud83d\ude02", "|", "\ud83d\udc4d", "|", "\ud83e\udd23", "|", "\ud83d\ude0d", "|", "\ud83d\ude2d", "|", "\ud83c\udf89", "|", "\ud83d\ude4f", "|", "\ud83d\ude0a", "|", "\ud83d\udd25", "|", "\ud83d\ude01", "|", "\ud83d\ude05", "|", "\ud83e\udd17", "|", "\ud83d\ude06", "|", "\ud83d\udc4f", "|", "\u2764", "\ufe0f", "|", "\ud83d\udc9c", "|", "\ud83d\udc9a", "|", "\ud83d\udc97", "|", "\ud83d\udc99", "|", "\ud83d\udda4", "|", "\ud83d\ude0e", "|", "\ud83d\udc4c", "|", "\ud83e\udd73", "|", "\ud83d\udcaa", "|", "\u2728", "|", "\ud83d\udc49", "|", "\ud83d\udc40", "|", "\ud83d\udcaf", "|", "\ud83c\udf88", "|", "\ud83d\ude48", "|", "\ud83d\ude4c", "|", "\ud83d\udc80", "|", "\ud83d\udc47", "|", "\ud83d\udc4b", "|", "\u2705", "|", "\ud83c\udf81", "|", "\ud83c\udf1e", "|", "\ud83c\udf38", "|", "\ud83d\udcb0"],
+      ids: [3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3],
+      decoded: "<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2728", "|", "\ud83e\udd17", "|", "\ud83d\udc41", "\ufe0f", "|", "\ud83d\udc71", "\ud83c\udffb", "|", "\ud83d\udd75", "\u200d", "\u2642", "\ufe0f", "|", "\ud83e\uddd9", "\ud83c\udffb", "\u200d", "\u2642", "|", "\ud83d\udc68", "\ud83c\udffb", "\u200d", "\ud83c\udf3e", "|", "\ud83e\uddd1", "\u200d", "\ud83e\udd1d", "\u200d", "\ud83e\uddd1", "|", "\ud83d\udc69", "\u200d", "\u2764", "\u200d", "\ud83d\udc8b", "\u200d", "\ud83d\udc68", "|", "\ud83d\udc69", "\u200d", "\ud83d\udc69", "\u200d", "\ud83d\udc67", "\u200d", "\ud83d\udc66", "|", "\ud83e\uddd1", "\ud83c\udffb", "\u200d", "\ud83e\udd1d", "\u200d", "\ud83e\uddd1", "\ud83c\udffb", "|", "\ud83c\udff4", "\udb40\udc67", "\udb40\udc62", "\udb40\udc65", "\udb40\udc6e", "\udb40\udc67", "\udb40\udc7f", "|", "\ud83d\udc68", "\ud83c\udffb", "\u200d", "\u2764", "\ufe0f", "\u200d", "\ud83d\udc8b", "\u200d", "\ud83d\udc68", "\ud83c\udffc"],
+      ids: [3, 4, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 3, 3, 4, 3, 3, 3, 3, 4, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
+      decoded: "<unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk>",
+    },
+    ONLY_WHITESPACE: {
+      text: BASE_TEST_STRINGS.ONLY_WHITESPACE,
+      tokens: ["|", "\t", "\n"],
+      ids: [4, 3, 3],
+      decoded: "<unk>",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["a", "h", "\u535a", "\u63a8", "z", "z"],
+      ids: [3, 3, 3, 3, 3, 3],
+      decoded: "<unk>",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["H", "\u00e9", "l", "l", "o"],
+      ids: [11, 3, 3, 3, 3],
+      decoded: "H<unk>",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["|", "\t", "H", "e", "L", "L", "o", "!", "h", "o", "w", "|", "|", "\n", "|", "A", "r", "e", "|", "y", "o", "U", "?", "|", "|"],
+      ids: [4, 3, 11, 3, 15, 15, 3, 3, 3, 3, 3, 4, 4, 3, 4, 7, 3, 3, 4, 3, 3, 16, 3, 4, 4],
+      decoded: "<unk>H<unk>L<unk> <unk> A<unk> <unk>U<unk>",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["|", "\t", "H", "\u00e4", "L", "L", "o", "!", "h", "o", "w", "|", "|", "\n", "|", "A", "r", "e", "|", "y", "o", "U", "?", "|", "|"],
+      ids: [4, 3, 11, 3, 15, 15, 3, 3, 3, 3, 3, 4, 4, 3, 4, 7, 3, 3, 4, 3, 3, 16, 3, 4, 4],
+      decoded: "<unk>H<unk>L<unk> <unk> A<unk> <unk>U<unk>",
+    },
+  },
+  "Xenova/wav2vec2-large-xlsr-53-english": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["H", "o", "w", "|", "a", "r", "e", "|", "y", "o", "u", "|", "d", "o", "i", "n", "g", "?"],
+      ids: [3, 21, 29, 4, 7, 24, 11, 4, 31, 21, 27, 4, 10, 21, 15, 20, 13, 3],
+      decoded: "<unk>ow are you doing<unk>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["Y", "o", "u", "|", "s", "h", "o", "u", "l", "d", "'", "v", "e", "|", "d", "o", "n", "e", "|", "t", "h", "i", "s"],
+      ids: [3, 21, 27, 4, 25, 14, 21, 27, 18, 10, 5, 28, 11, 4, 10, 21, 20, 11, 4, 26, 14, 15, 25],
+      decoded: "<unk>ou should've done this",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["T", "h", "e", "|", "c", "o", "m", "p", "a", "n", "y", "|", "w", "a", "s", "|", "f", "o", "u", "n", "d", "e", "d", "|", "i", "n", "|", "2", "0", "1", "6", "."],
+      ids: [3, 14, 11, 4, 9, 21, 19, 22, 7, 20, 31, 4, 29, 7, 25, 4, 12, 21, 27, 20, 10, 11, 10, 4, 15, 20, 4, 3, 3, 3, 3, 3],
+      decoded: "<unk>he company was founded in <unk>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\n", "'", "l", "l", "|", "!", "!", "t", "o", "?", "'", "d", "'", "'", "d", "|", "o", "f", ",", "|", "c", "a", "n", "'", "t", "."],
+      ids: [3, 3, 5, 18, 18, 4, 3, 3, 26, 21, 3, 5, 10, 5, 5, 10, 4, 21, 12, 3, 4, 9, 7, 20, 5, 26, 3],
+      decoded: "<unk>'l <unk>to<unk>'d'd of<unk> can't<unk>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["d", "e", "f", "|", "m", "a", "i", "n", "(", ")", ":", "\n", "\t", "p", "a", "s", "s"],
+      ids: [10, 11, 12, 4, 19, 7, 15, 20, 3, 3, 3, 3, 3, 22, 7, 25, 25],
+      decoded: "def main<unk>pas",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["l", "e", "t", "|", "a", "|", "=", "|", "o", "b", "j", ".", "t", "o", "S", "t", "r", "i", "n", "g", "(", ")", ";", "\n", "t", "o", "S", "t", "r", "i", "n", "g", "(", ")", ";"],
+      ids: [18, 11, 26, 4, 7, 4, 3, 4, 21, 8, 16, 3, 26, 21, 3, 26, 24, 15, 20, 13, 3, 3, 3, 3, 26, 21, 3, 26, 24, 15, 20, 13, 3, 3, 3],
+      decoded: "let a <unk> obj<unk>to<unk>tring<unk>to<unk>tring<unk>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["T", "h", "i", "s", "\n", "\n", "i", "s", "\n", "a", "\n", "t", "e", "s", "t", "."],
+      ids: [3, 14, 15, 25, 3, 3, 15, 25, 3, 7, 3, 26, 11, 25, 26, 3],
+      decoded: "<unk>his<unk>is<unk>a<unk>test<unk>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["U", "N", "w", "a", "n", "t", "\u00e9", "d", ",", "r", "u", "n", "n", "i", "n", "g"],
+      ids: [3, 3, 29, 7, 20, 26, 3, 10, 3, 24, 27, 20, 20, 15, 20, 13],
+      decoded: "<unk>want<unk>d<unk>runing",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["H", "e", "l", "l", "o", "|", "W", "o", "r", "l", "d"],
+      ids: [3, 11, 18, 18, 21, 4, 3, 21, 24, 18, 10],
+      decoded: "<unk>elo <unk>orld",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["h", "e", "l", "l", "o", "|", "w", "o", "r", "l", "d"],
+      ids: [14, 11, 18, 18, 21, 4, 29, 21, 24, 18, 10],
+      decoded: "helo world",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["|", "|", "|", "l", "e", "a", "d", "i", "n", "g", "|", "s", "p", "a", "c", "e"],
+      ids: [4, 4, 4, 18, 11, 7, 10, 15, 20, 13, 4, 25, 22, 7, 9, 11],
+      decoded: "leading space",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["t", "r", "a", "i", "l", "i", "n", "g", "|", "s", "p", "a", "c", "e", "|", "|", "|"],
+      ids: [26, 24, 7, 15, 18, 15, 20, 13, 4, 25, 22, 7, 9, 11, 4, 4, 4],
+      decoded: "trailing space",
+    },
+    SURROUNDING_SPACE: {
+      text: BASE_TEST_STRINGS.SURROUNDING_SPACE,
+      tokens: ["|", "|", "|", "s", "u", "r", "r", "o", "u", "n", "d", "i", "n", "g", "|", "s", "p", "a", "c", "e", "|", "|", "|"],
+      ids: [4, 4, 4, 25, 27, 24, 24, 21, 27, 20, 10, 15, 20, 13, 4, 25, 22, 7, 9, 11, 4, 4, 4],
+      decoded: "surounding space",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["H", "i", "|", "|", "H", "e", "l", "l", "o"],
+      ids: [3, 15, 4, 4, 3, 11, 18, 18, 21],
+      decoded: "<unk>i <unk>elo",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["t", "e", "s", "t", "|", "$", "1", "|", "R", "2", "|", "#", "3", "|", "\u20ac", "4", "|", "\u00a3", "5", "|", "\u00a5", "6", "|", "\u20a3", "7", "|", "\u20b9", "8", "|", "\u20b1", "9", "|", "t", "e", "s", "t"],
+      ids: [26, 11, 25, 26, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 26, 11, 25, 26],
+      decoded: "test <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> <unk> test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "|", "b", "o", "u", "g", "h", "t", "|", "a", "n", "|", "a", "p", "p", "l", "e", "|", "f", "o", "r", "|", "$", "1", ".", "0", "0", "|", "a", "t", "|", "t", "h", "e", "|", "s", "t", "o", "r", "e", "."],
+      ids: [3, 4, 8, 21, 27, 13, 14, 26, 4, 7, 20, 4, 7, 22, 22, 18, 11, 4, 12, 21, 24, 4, 3, 3, 3, 3, 3, 4, 7, 26, 4, 26, 14, 11, 4, 25, 26, 21, 24, 11, 3],
+      decoded: "<unk> bought an aple for <unk> at the store<unk>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["y", "o", "u", "\u2026", "|", "|"],
+      ids: [31, 21, 27, 3, 4, 4],
+      decoded: "you<unk>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["y", "o", "u", "\u2026", "\u00a0", "\u00a0"],
+      ids: [31, 21, 27, 3, 3, 3],
+      decoded: "you<unk>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["y", "o", "u", "\u2026", "\u00a0", "\u00a0", "y", "o", "u", "\u2026", "\u00a0", "\u00a0"],
+      ids: [31, 21, 27, 3, 3, 3, 31, 21, 27, 3, 3, 3],
+      decoded: "you<unk>you<unk>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["w", "e", "i", "r", "d", "|", "\uff5e", "|", "e", "d", "g", "e", "|", "\uff5e", "|", "c", "a", "s", "e"],
+      ids: [29, 11, 15, 24, 10, 4, 3, 4, 11, 10, 13, 11, 4, 3, 4, 9, 7, 25, 11],
+      decoded: "weird <unk> edge <unk> case",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581", "T", "h", "i", "s", "|", "\u2581", "i", "s", "|", "\u2581", "a", "|", "\u2581", "t", "e", "s", "t", "|", "\u2581", "."],
+      ids: [3, 3, 14, 15, 25, 4, 3, 15, 25, 4, 3, 7, 4, 3, 26, 11, 25, 26, 4, 3, 3],
+      decoded: "<unk>his <unk>is <unk>a <unk>test <unk>",
+    },
+    CHINESE_LATIN_MIXED: {
+      text: BERT_TEST_STRINGS.CHINESE_LATIN_MIXED,
+      tokens: ["a", "h", "\u535a", "\u63a8", "z", "z"],
+      ids: [7, 14, 3, 3, 32, 32],
+      decoded: "ah<unk>z",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["H", "\u00e9", "l", "l", "o"],
+      ids: [3, 3, 18, 18, 21],
+      decoded: "<unk>lo",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["|", "\t", "H", "e", "L", "L", "o", "!", "h", "o", "w", "|", "|", "\n", "|", "A", "r", "e", "|", "y", "o", "U", "?", "|", "|"],
+      ids: [4, 3, 3, 11, 3, 3, 21, 3, 14, 21, 29, 4, 4, 3, 4, 3, 24, 11, 4, 31, 21, 3, 3, 4, 4],
+      decoded: "<unk>e<unk>o<unk>how <unk> <unk>re yo<unk>",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["|", "\t", "H", "\u00e4", "L", "L", "o", "!", "h", "o", "w", "|", "|", "\n", "|", "A", "r", "e", "|", "y", "o", "U", "?", "|", "|"],
+      ids: [4, 3, 3, 3, 3, 3, 21, 3, 14, 21, 29, 4, 4, 3, 4, 3, 24, 11, 4, 31, 21, 3, 3, 4, 4],
+      decoded: "<unk>o<unk>how <unk> <unk>re yo<unk>",
+    },
+  },
+  "Xenova/mms-1b-all": {
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "|", "0", "|", "1", "|", "2", "|", "3", "|", "4", "|", "5", "|", "6", "|", "7", "|", "8", "|", "9", "|", "1", "0", "|", "1", "0", "0", "|", "1", "0", "0", "0"],
+      ids: [27, 30, 35, 41, 39, 38, 40, 43, 42, 36, 4, 27, 4, 30, 4, 35, 4, 41, 4, 39, 4, 38, 4, 40, 4, 43, 4, 42, 4, 36, 4, 30, 27, 4, 30, 27, 27, 4, 30, 27, 27, 27],
+      decoded: "0123456789 0 1 2 3 4 5 6 7 8 9 10 10 10",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["T", "h", "e", "|", "c", "o", "m", "p", "a", "n", "y", "|", "w", "a", "s", "|", "f", "o", "u", "n", "d", "e", "d", "|", "i", "n", "|", "2", "0", "1", "6", "."],
+      ids: [3, 13, 5, 4, 16, 8, 18, 20, 7, 10, 22, 4, 23, 7, 11, 4, 19, 8, 17, 10, 15, 5, 15, 4, 9, 10, 4, 35, 27, 30, 40, 37],
+      decoded: "<unk>he company was founded in 2016.",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\n", "'", "l", "l", "|", "!", "!", "t", "o", "?", "'", "d", "'", "'", "d", "|", "o", "f", ",", "|", "c", "a", "n", "'", "t", "."],
+      ids: [3, 3, 31, 14, 14, 4, 75, 75, 6, 8, 3, 31, 15, 31, 31, 15, 4, 8, 19, 44, 4, 16, 7, 10, 31, 6, 37],
+      decoded: "<unk>'l!to<unk>'d'd of, can't.",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["d", "e", "f", "|", "m", "a", "i", "n", "(", ")", ":", "\n", "\t", "p", "a", "s", "s"],
+      ids: [15, 5, 19, 4, 18, 7, 9, 10, 3, 3, 46, 3, 3, 20, 7, 11, 11],
+      decoded: "def main<unk>:<unk>pas",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["l", "e", "t", "|", "a", "|", "=", "|", "o", "b", "j", ".", "t", "o", "S", "t", "r", "i", "n", "g", "(", ")", ";", "\n", "t", "o", "S", "t", "r", "i", "n", "g", "(", ")", ";"],
+      ids: [14, 5, 6, 4, 7, 4, 3, 4, 8, 24, 29, 37, 6, 8, 3, 6, 12, 9, 10, 21, 3, 3, 52, 3, 6, 8, 3, 6, 12, 9, 10, 21, 3, 3, 52],
+      decoded: "let a <unk> obj.to<unk>tring<unk>;<unk>to<unk>tring<unk>;",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["T", "h", "i", "s", "\n", "\n", "i", "s", "\n", "a", "\n", "t", "e", "s", "t", "."],
+      ids: [3, 13, 9, 11, 3, 3, 9, 11, 3, 7, 3, 6, 5, 11, 6, 37],
+      decoded: "<unk>his<unk>is<unk>a<unk>test.",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["U", "N", "w", "a", "n", "t", "\u00e9", "d", ",", "r", "u", "n", "n", "i", "n", "g"],
+      ids: [3, 3, 23, 7, 10, 6, 55, 15, 44, 12, 17, 10, 10, 9, 10, 21],
+      decoded: "<unk>want\u00e9d,runing",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0000", "2", "\ufffd", "3"],
+      ids: [30, 3, 35, 3, 41],
+      decoded: "1<unk>2<unk>3",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u751f", "\u6d3b", "\u7684", "\u771f", "\u8c1b", "\u662f"],
+      ids: [136, 3, 3, 3, 3, 3],
+      decoded: "\u751f<unk>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["t", "e", "s", "t", "|", "$", "1", "|", "R", "2", "|", "#", "3", "|", "\u20ac", "4", "|", "\u00a3", "5", "|", "\u00a5", "6", "|", "\u20a3", "7", "|", "\u20b9", "8", "|", "\u20b1", "9", "|", "t", "e", "s", "t"],
+      ids: [6, 5, 11, 6, 4, 48, 30, 4, 3, 35, 4, 3, 41, 4, 3, 39, 4, 68, 38, 4, 53, 40, 4, 3, 43, 4, 3, 42, 4, 3, 36, 4, 6, 5, 11, 6],
+      decoded: "test $1 <unk>2 <unk>3 <unk>4 \u00a35 \u00a56 <unk>7 <unk>8 <unk>9 test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "|", "b", "o", "u", "g", "h", "t", "|", "a", "n", "|", "a", "p", "p", "l", "e", "|", "f", "o", "r", "|", "$", "1", ".", "0", "0", "|", "a", "t", "|", "t", "h", "e", "|", "s", "t", "o", "r", "e", "."],
+      ids: [3, 4, 24, 8, 17, 21, 13, 6, 4, 7, 10, 4, 7, 20, 20, 14, 5, 4, 19, 8, 12, 4, 48, 30, 37, 27, 27, 4, 7, 6, 4, 6, 13, 5, 4, 11, 6, 8, 12, 5, 37],
+      decoded: "<unk> bought an aple for $1.0 at the store.",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581", "T", "h", "i", "s", "|", "\u2581", "i", "s", "|", "\u2581", "a", "|", "\u2581", "t", "e", "s", "t", "|", "\u2581", "."],
+      ids: [3, 3, 13, 9, 11, 4, 3, 9, 11, 4, 3, 7, 4, 3, 6, 5, 11, 6, 4, 3, 37],
+      decoded: "<unk>his <unk>is <unk>a <unk>test <unk>.",
+    },
+    SIMPLE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.SIMPLE_WITH_ACCENTS,
+      tokens: ["H", "\u00e9", "l", "l", "o"],
+      ids: [3, 55, 14, 14, 8],
+      decoded: "<unk>\u00e9lo",
+    },
+    MIXED_CASE_WITHOUT_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITHOUT_ACCENTS,
+      tokens: ["|", "\t", "H", "e", "L", "L", "o", "!", "h", "o", "w", "|", "|", "\n", "|", "A", "r", "e", "|", "y", "o", "U", "?", "|", "|"],
+      ids: [4, 3, 3, 5, 3, 3, 8, 75, 13, 8, 23, 4, 4, 3, 4, 3, 12, 5, 4, 22, 8, 3, 3, 4, 4],
+      decoded: "<unk>e<unk>o!how <unk> <unk>re yo<unk>",
+    },
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["|", "\t", "H", "\u00e4", "L", "L", "o", "!", "h", "o", "w", "|", "|", "\n", "|", "A", "r", "e", "|", "y", "o", "U", "?", "|", "|"],
+      ids: [4, 3, 3, 78, 3, 3, 8, 75, 13, 8, 23, 4, 4, 3, 4, 3, 12, 5, 4, 22, 8, 3, 3, 4, 4],
+      decoded: "<unk>\u00e4<unk>o!how <unk> <unk>re yo<unk>",
+    },
+  },
+  "Xenova/mms-1b-fl102": {
+    MIXED_CASE_WITH_ACCENTS: {
+      text: BERT_TEST_STRINGS.MIXED_CASE_WITH_ACCENTS,
+      tokens: ["|", "\t", "H", "\u00e4", "L", "L", "o", "!", "h", "o", "w", "|", "|", "\n", "|", "A", "r", "e", "|", "y", "o", "U", "?", "|", "|"],
+      ids: [4, 3, 3, 3, 3, 3, 8, 75, 13, 8, 23, 4, 4, 3, 4, 3, 12, 5, 4, 22, 8, 3, 3, 4, 4],
+      decoded: "<unk>o!how <unk> <unk>re yo<unk>",
+    },
+  },
+  "Xenova/mms-1b-l1107": {
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "|", "0", "|", "1", "|", "2", "|", "3", "|", "4", "|", "5", "|", "6", "|", "7", "|", "8", "|", "9", "|", "1", "0", "|", "1", "0", "0", "|", "1", "0", "0", "0"],
+      ids: [34, 36, 37, 42, 38, 41, 39, 3, 3, 3, 4, 34, 4, 36, 4, 37, 4, 42, 4, 38, 4, 41, 4, 39, 4, 3, 4, 3, 4, 3, 4, 36, 34, 4, 36, 34, 34, 4, 36, 34, 34, 34],
+      decoded: "0123456<unk> 0 1 2 3 4 5 6 <unk> <unk> <unk> 10 10 10",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["T", "h", "e", "|", "c", "o", "m", "p", "a", "n", "y", "|", "w", "a", "s", "|", "f", "o", "u", "n", "d", "e", "d", "|", "i", "n", "|", "2", "0", "1", "6", "."],
+      ids: [3, 9, 5, 4, 21, 7, 18, 24, 8, 10, 20, 4, 17, 8, 12, 4, 19, 7, 16, 10, 14, 5, 14, 4, 11, 10, 4, 37, 34, 36, 39, 3],
+      decoded: "<unk>he company was founded in 2016<unk>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["t", "e", "s", "t", "|", "$", "1", "|", "R", "2", "|", "#", "3", "|", "\u20ac", "4", "|", "\u00a3", "5", "|", "\u00a5", "6", "|", "\u20a3", "7", "|", "\u20b9", "8", "|", "\u20b1", "9", "|", "t", "e", "s", "t"],
+      ids: [6, 5, 12, 6, 4, 3, 36, 4, 3, 37, 4, 3, 42, 4, 3, 38, 4, 3, 41, 4, 3, 39, 4, 3, 3, 4, 3, 3, 4, 3, 3, 4, 6, 5, 12, 6],
+      decoded: "test <unk>1 <unk>2 <unk>3 <unk>4 <unk>5 <unk>6 <unk> <unk> <unk> test",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "|", "b", "o", "u", "g", "h", "t", "|", "a", "n", "|", "a", "p", "p", "l", "e", "|", "f", "o", "r", "|", "$", "1", ".", "0", "0", "|", "a", "t", "|", "t", "h", "e", "|", "s", "t", "o", "r", "e", "."],
+      ids: [3, 4, 23, 7, 16, 22, 9, 6, 4, 8, 10, 4, 8, 24, 24, 15, 5, 4, 19, 7, 13, 4, 3, 36, 3, 34, 34, 4, 8, 6, 4, 6, 9, 5, 4, 12, 6, 7, 13, 5, 3],
+      decoded: "<unk> bought an aple for <unk>1<unk>0 at the store<unk>",
+    },
+  },
+};
diff --git a/tests/models/whisper/tokenization.js b/tests/models/whisper/tokenization.js
new file mode 100644
index 000000000..bb3b1c685
--- /dev/null
+++ b/tests/models/whisper/tokenization.js
@@ -0,0 +1,778 @@
+import { WhisperTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS, WHISPER_TEST_STRINGS } from "../test_strings.js";
+import { compare } from "../../test_utils.js";
+
+export const TOKENIZER_CLASS = WhisperTokenizer;
+export const TEST_CONFIG = {
+  "onnx-community/whisper-tiny.en": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "\u0120are", "\u0120you", "\u0120doing", "?"],
+      ids: [50257, 50362, 2437, 389, 345, 1804, 30, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>How are you doing?<|endoftext|>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "\u0120should", "'ve", "\u0120done", "\u0120this"],
+      ids: [50257, 50362, 1639, 815, 1053, 1760, 428, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>You should've done this<|endoftext|>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["01", "23", "45", "67", "89", "\u01200", "\u01201", "\u01202", "\u01203", "\u01204", "\u01205", "\u01206", "\u01207", "\u01208", "\u01209", "\u012010", "\u0120100", "\u01201000"],
+      ids: [50257, 50362, 486, 1954, 2231, 3134, 4531, 657, 352, 362, 513, 604, 642, 718, 767, 807, 860, 838, 1802, 8576, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000<|endoftext|>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120founded", "\u0120in", "\u01202016", "."],
+      ids: [50257, 50362, 464, 1664, 373, 9393, 287, 1584, 13, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>The company was founded in 2016.<|endoftext|>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'ll", "\u0120!!", "to", "?'", "d", "''", "d", "\u0120of", ",", "\u0120can", "'t", "."],
+      ids: [50257, 50362, 32, 198, 1183, 37867, 1462, 8348, 67, 7061, 67, 286, 11, 460, 470, 13, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>A\n'll!!to?'d''d of, can't.<|endoftext|>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "\u0120main", "():", "\u010a", "\u0109", "pass"],
+      ids: [50257, 50362, 4299, 1388, 33529, 198, 197, 6603, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>def main():\n\tpass<|endoftext|>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120obj", ".", "to", "String", "();", "\u010a", "to", "String", "();"],
+      ids: [50257, 50362, 1616, 257, 796, 26181, 13, 1462, 10100, 9783, 198, 1462, 10100, 9783, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>let a = obj.toString();\ntoString();<|endoftext|>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["This", "\u010a", "\u010a", "is", "\u010a", "a", "\u010a", "test", "."],
+      ids: [50257, 50362, 1212, 198, 198, 271, 198, 64, 198, 9288, 13, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>This\n\nis\na\ntest.<|endoftext|>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "want", "\u00c3\u00a9", "d", ",", "running"],
+      ids: [50257, 50362, 4944, 42949, 2634, 67, 11, 20270, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>UNwant\u00e9d,running<|endoftext|>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0100", "2", "\u00ef\u00bf\u00bd", "3"],
+      ids: [50257, 50362, 16, 188, 17, 4210, 18, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>1\u00002\ufffd3<|endoftext|>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hello", "\u0120World"],
+      ids: [50257, 50362, 15496, 2159, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>Hello World<|endoftext|>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["hello", "\u0120world"],
+      ids: [50257, 50362, 31373, 995, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>hello world<|endoftext|>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141", "\u00e6", "\u00b4", "\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e", "\u0141", "\u00e8", "\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [50257, 50362, 37955, 162, 112, 119, 21410, 40367, 253, 164, 108, 249, 42468, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>\u751f\u6d3b\u7684\u771f\u8c1b\u662f<|endoftext|>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120", "\u0120", "\u0120leading", "\u0120space"],
+      ids: [50257, 50362, 220, 220, 3756, 2272, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>   leading space<|endoftext|>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tra", "iling", "\u0120space", "\u0120", "\u0120", "\u0120"],
+      ids: [50257, 50362, 9535, 4386, 2272, 220, 220, 220, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>trailing space   <|endoftext|>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["Hi", "\u0120", "\u0120Hello"],
+      ids: [50257, 50362, 17250, 220, 18435, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>Hi  Hello<|endoftext|>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2\u00a5", "6", "\u0120\u00e2", "\u0124", "\u00a3", "7", "\u0120\u00e2", "\u0124", "\u00b9", "8", "\u0120\u00e2", "\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [50257, 50362, 9288, 720, 16, 371, 17, 1303, 18, 10432, 19, 4248, 20, 38221, 21, 2343, 224, 96, 22, 2343, 224, 117, 23, 2343, 224, 109, 24, 1332, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test<|endoftext|>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120bought", "\u0120an", "\u0120apple", "\u0120for", "\u0120$", "1", ".", "00", "\u0120at", "\u0120the", "\u0120store", "."],
+      ids: [50257, 50362, 40, 5839, 281, 17180, 329, 720, 16, 13, 405, 379, 262, 3650, 13, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>I bought an apple for $1.00 at the store.<|endoftext|>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u0120", "\u0120"],
+      ids: [50257, 50362, 5832, 1399, 220, 220, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>you\u2026  <|endoftext|>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [50257, 50362, 5832, 1399, 4603, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>you\u2026\u00a0\u00a0<|endoftext|>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2\u0142", "\u00c2\u0142", "you", "\u00e2\u0122\u00a6", "\u00c2\u0142\u00c2\u0142"],
+      ids: [50257, 50362, 5832, 1399, 1849, 1849, 5832, 1399, 4603, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0<|endoftext|>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120\u00ef", "\u00bd", "\u0140", "\u0120edge", "\u0120\u00ef", "\u00bd", "\u0140", "\u0120case"],
+      ids: [50257, 50362, 732, 1447, 27332, 121, 252, 5743, 27332, 121, 252, 1339, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>weird \uff5e edge \uff5e case<|endoftext|>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u00e2\u0138", "\u0123", "This", "\u0120\u00e2\u0138", "\u0123", "is", "\u0120\u00e2\u0138", "\u0123", "a", "\u0120\u00e2\u0138", "\u0123", "test", "\u0120\u00e2\u0138", "\u0123", "."],
+      ids: [50257, 50362, 5008, 223, 1212, 11019, 223, 271, 11019, 223, 64, 11019, 223, 9288, 11019, 223, 13, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>\u2581This \u2581is \u2581a \u2581test \u2581.<|endoftext|>",
+    },
+    SPECIAL_TOKENS: {
+      text: WHISPER_TEST_STRINGS.SPECIAL_TOKENS,
+      tokens: ["\u0120", "\u0120", "\u0120", "<|startoftranscript|>", "\u0120", "<|en|>", "\u0120", "\u0120", "\u0120"],
+      ids: [50257, 50362, 220, 220, 220, 50257, 220, 50258, 220, 220, 220, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|>   <|startoftranscript|> <|en|>   <|endoftext|>",
+    },
+  },
+  "distil-whisper/distil-large-v3": {
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0", "12", "3", "45", "67", "89", "\u01200", "\u01201", "\u01202", "\u01203", "\u01204", "\u01205", "\u01206", "\u01207", "\u01208", "\u01209", "\u012010", "\u0120100", "\u01201000"],
+      ids: [50258, 50364, 15, 4762, 18, 8465, 22452, 21115, 1958, 502, 568, 805, 1017, 1025, 1386, 1614, 1649, 1722, 1266, 2319, 9714, 50257],
+      decoded: "<|startoftranscript|><|notimestamps|>0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000<|endoftext|>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "\u0120main", "(", "):", "\u010a", "\u0109", "pass"],
+      ids: [50258, 50364, 20595, 2135, 7, 4507, 198, 197, 9216, 50257],
+      decoded: "<|startoftranscript|><|notimestamps|>def main():\n\tpass<|endoftext|>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120ob", "j", ".", "to", "St", "ring", "(", ");", "\u010a", "to", "St", "ring", "(", ");"],
+      ids: [50258, 50364, 2631, 257, 6585, 1111, 73, 13, 1353, 4520, 2937, 7, 34446, 198, 1353, 4520, 2937, 7, 34446, 50257],
+      decoded: "<|startoftranscript|><|notimestamps|>let a = obj.toString();\ntoString();<|endoftext|>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "w", "ant", "\u00c3\u00a9d", ",", "running"],
+      ids: [50258, 50364, 3979, 86, 394, 7811, 11, 45482, 50257],
+      decoded: "<|startoftranscript|><|notimestamps|>UNwant\u00e9d,running<|endoftext|>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["he", "llo", "\u0120world"],
+      ids: [50258, 50364, 675, 1913, 1002, 50257],
+      decoded: "<|startoftranscript|><|notimestamps|>hello world<|endoftext|>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141\u00e6\u00b4\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e\u0141", "\u00e8\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [50258, 50364, 49958, 1546, 6303, 8897, 249, 1541, 50257],
+      decoded: "<|startoftranscript|><|notimestamps|>\u751f\u6d3b\u7684\u771f\u8c1b\u662f<|endoftext|>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2", "\u00a5", "6", "\u0120\u00e2", "\u0124", "\u00a3", "7", "\u0120\u00e2", "\u0124", "\u00b9", "8", "\u0120\u00e2", "\u0124", "\u00b1", "9", "\u0120test"],
+      ids: [50258, 50364, 31636, 1848, 16, 497, 17, 3536, 18, 17450, 19, 14378, 20, 1815, 98, 21, 672, 224, 96, 22, 672, 224, 117, 23, 672, 224, 109, 24, 1500, 50257],
+      decoded: "<|startoftranscript|><|notimestamps|>test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test<|endoftext|>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142"],
+      ids: [50258, 50364, 5616, 1260, 126, 254, 126, 254, 50257],
+      decoded: "<|startoftranscript|><|notimestamps|>you\u2026\u00a0\u00a0<|endoftext|>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142", "you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142"],
+      ids: [50258, 50364, 5616, 1260, 126, 254, 126, 254, 5616, 1260, 126, 254, 126, 254, 50257],
+      decoded: "<|startoftranscript|><|notimestamps|>you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0<|endoftext|>",
+    },
+  },
+  "distil-whisper/distil-large-v2": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["How", "\u0120are", "\u0120you", "\u0120doing", "?"],
+      ids: [50258, 50259, 50359, 50363, 6462, 366, 291, 884, 30, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>How are you doing?<|endoftext|>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["You", "\u0120should", "'ve", "\u0120done", "\u0120", "this"],
+      ids: [50258, 50259, 50359, 50363, 3223, 820, 600, 1096, 220, 11176, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>You should've done this<|endoftext|>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["0", "12", "3", "45", "67", "89", "\u01200", "\u01201", "\u01202", "\u01203", "\u01204", "\u01205", "\u01206", "\u01207", "\u01208", "\u01209", "\u012010", "\u0120100", "\u01201000"],
+      ids: [50258, 50259, 50359, 50363, 15, 4762, 18, 8465, 22452, 21115, 1958, 502, 568, 805, 1017, 1025, 1386, 1614, 1649, 1722, 1266, 2319, 9714, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000<|endoftext|>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["The", "\u0120company", "\u0120was", "\u0120founded", "\u0120in", "\u01202016", "."],
+      ids: [50258, 50259, 50359, 50363, 2278, 2237, 390, 13234, 294, 6549, 13, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>The company was founded in 2016.<|endoftext|>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["A", "\u010a", "'ll", "\u0120!!", "to", "?'", "d", "''", "d", "\u0120of", ",", "\u0120can", "'t", "."],
+      ids: [50258, 50259, 50359, 50363, 32, 198, 603, 15138, 1353, 8569, 67, 15025, 67, 295, 11, 393, 380, 13, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>A\n'll!!to?'d''d of, can't.<|endoftext|>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["def", "\u0120main", "(", "):", "\u010a", "\u0109", "pass"],
+      ids: [50258, 50259, 50359, 50363, 20595, 2135, 7, 4507, 198, 197, 9216, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>def main():\n\tpass<|endoftext|>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["let", "\u0120a", "\u0120=", "\u0120ob", "j", ".", "to", "St", "ring", "(", ");", "\u010a", "to", "St", "ring", "(", ");"],
+      ids: [50258, 50259, 50359, 50363, 2631, 257, 6585, 1111, 73, 13, 1353, 4520, 2937, 7, 34446, 198, 1353, 4520, 2937, 7, 34446, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>let a = obj.toString();\ntoString();<|endoftext|>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["This", "\u010a", "\u010a", "is", "\u010a", "a", "\u010a", "test", "."],
+      ids: [50258, 50259, 50359, 50363, 5723, 198, 198, 271, 198, 64, 198, 31636, 13, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>This\n\nis\na\ntest.<|endoftext|>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["UN", "w", "ant", "\u00c3\u00a9d", ",", "running"],
+      ids: [50258, 50259, 50359, 50363, 3979, 86, 394, 7811, 11, 45482, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>UNwant\u00e9d,running<|endoftext|>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["1", "\u0100", "2", "\u00ef\u00bf\u00bd", "3"],
+      ids: [50258, 50259, 50359, 50363, 16, 188, 17, 5342, 18, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>1\u00002\ufffd3<|endoftext|>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["Hello", "\u0120World"],
+      ids: [50258, 50259, 50359, 50363, 15947, 3937, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>Hello World<|endoftext|>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["he", "llo", "\u0120world"],
+      ids: [50258, 50259, 50359, 50363, 675, 1913, 1002, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>hello world<|endoftext|>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u00e7\u0136\u0141\u00e6\u00b4\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e\u0141", "\u00e8\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [50258, 50259, 50359, 50363, 49958, 1546, 6303, 8897, 249, 1541, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>\u751f\u6d3b\u7684\u771f\u8c1b\u662f<|endoftext|>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120", "\u0120", "\u0120leading", "\u0120space"],
+      ids: [50258, 50259, 50359, 50363, 220, 220, 5775, 1901, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>   leading space<|endoftext|>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["tra", "iling", "\u0120space", "\u0120", "\u0120", "\u0120"],
+      ids: [50258, 50259, 50359, 50363, 17227, 4883, 1901, 220, 220, 220, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>trailing space   <|endoftext|>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["Hi", "\u0120", "\u0120Hello"],
+      ids: [50258, 50259, 50359, 50363, 17155, 220, 2425, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>Hi  Hello<|endoftext|>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2", "\u00a5", "6", "\u0120\u00e2", "\u0124", "\u00a3", "7", "\u0120\u00e2", "\u0124", "\u00b9", "8", "\u0120\u00e2", "\u0124", "\u00b1", "9", "\u0120", "test"],
+      ids: [50258, 50259, 50359, 50363, 31636, 1848, 16, 497, 17, 3536, 18, 17450, 19, 14378, 20, 1815, 98, 21, 672, 224, 96, 22, 672, 224, 117, 23, 672, 224, 109, 24, 220, 31636, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test<|endoftext|>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["I", "\u0120bought", "\u0120an", "\u0120apple", "\u0120for", "\u0120$", "1", ".", "00", "\u0120at", "\u0120", "the", "\u0120store", "."],
+      ids: [50258, 50259, 50359, 50363, 40, 4243, 364, 10606, 337, 1848, 16, 13, 628, 412, 220, 3322, 3531, 13, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>I bought an apple for $1.00 at the store.<|endoftext|>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u0120", "\u0120"],
+      ids: [50258, 50259, 50359, 50363, 5616, 1260, 220, 220, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>you\u2026  <|endoftext|>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142"],
+      ids: [50258, 50259, 50359, 50363, 5616, 1260, 126, 254, 126, 254, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>you\u2026\u00a0\u00a0<|endoftext|>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142", "you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142"],
+      ids: [50258, 50259, 50359, 50363, 5616, 1260, 126, 254, 126, 254, 5616, 1260, 126, 254, 126, 254, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0<|endoftext|>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["we", "ird", "\u0120\u00ef", "\u00bd", "\u0140", "\u0120edge", "\u0120\u00ef", "\u00bd", "\u0140", "\u0120case"],
+      ids: [50258, 50259, 50359, 50363, 826, 1271, 25072, 121, 252, 4691, 25072, 121, 252, 1389, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>weird \uff5e edge \uff5e case<|endoftext|>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u00e2\u0138", "\u0123", "This", "\u0120\u00e2\u0138", "\u0123", "is", "\u0120\u00e2\u0138", "\u0123", "a", "\u0120\u00e2\u0138", "\u0123", "test", "\u0120\u00e2\u0138", "\u0123", "."],
+      ids: [50258, 50259, 50359, 50363, 39984, 223, 5723, 29405, 223, 271, 29405, 223, 64, 29405, 223, 31636, 29405, 223, 13, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>\u2581This \u2581is \u2581a \u2581test \u2581.<|endoftext|>",
+    },
+    SPECIAL_TOKENS: {
+      text: WHISPER_TEST_STRINGS.SPECIAL_TOKENS,
+      tokens: ["\u0120", "\u0120", "\u0120", "<|startoftranscript|>", "\u0120", "<|en|>", "\u0120", "\u0120", "\u0120"],
+      ids: [50258, 50259, 50359, 50363, 220, 220, 220, 50258, 220, 50259, 220, 220, 220, 50257],
+      decoded: "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>   <|startoftranscript|> <|en|>   <|endoftext|>",
+    },
+  },
+  "distil-whisper/distil-small.en": {
+    SPECIAL_TOKENS: {
+      text: WHISPER_TEST_STRINGS.SPECIAL_TOKENS,
+
+      // https://github.com/huggingface/transformers/issues/33371
+      // tokens: ["   <|startoftranscript|> ", "<|en|>   "],
+      tokens: ["<|startoftranscript|>", "<|en|>"],
+      ids: [50257, 50362, 50257, 50258, 50256],
+      decoded: "<|startoftranscript|><|notimestamps|><|startoftranscript|><|en|><|endoftext|>",
+    },
+  },
+  "Xenova/nb-whisper-tiny-beta": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["\u0120How", "\u0120are", "\u0120you", "\u0120doing", "?"],
+      ids: [50258, 50288, 50359, 50363, 1012, 366, 291, 884, 30, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> How are you doing?<|endoftext|>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["\u0120You", "\u0120should", "'ve", "\u0120done", "\u0120", "this"],
+      ids: [50258, 50288, 50359, 50363, 509, 820, 600, 1096, 220, 11176, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> You should've done this<|endoftext|>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["\u01200", "12", "3", "45", "67", "89", "\u01200", "\u01201", "\u01202", "\u01203", "\u01204", "\u01205", "\u01206", "\u01207", "\u01208", "\u01209", "\u012010", "\u0120100", "\u01201000"],
+      ids: [50258, 50288, 50359, 50363, 1958, 4762, 18, 8465, 22452, 21115, 1958, 502, 568, 805, 1017, 1025, 1386, 1614, 1649, 1722, 1266, 2319, 9714, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> 0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000<|endoftext|>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["\u0120The", "\u0120company", "\u0120was", "\u0120founded", "\u0120in", "\u01202016", "."],
+      ids: [50258, 50288, 50359, 50363, 440, 2237, 390, 13234, 294, 6549, 13, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> The company was founded in 2016.<|endoftext|>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["\u0120A", "\u010a", "'ll", "\u0120!!", "to", "?'", "d", "''", "d", "\u0120of", ",", "\u0120can", "'t", "."],
+      ids: [50258, 50288, 50359, 50363, 316, 198, 603, 15138, 1353, 8569, 67, 15025, 67, 295, 11, 393, 380, 13, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> A\n'll!!to?'d''d of, can't.<|endoftext|>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["\u0120def", "\u0120main", "(", "):", "\u010a", "\u0109", "pass"],
+      ids: [50258, 50288, 50359, 50363, 1060, 2135, 7, 4507, 198, 197, 9216, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> def main():\n\tpass<|endoftext|>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["\u0120let", "\u0120a", "\u0120=", "\u0120ob", "j", ".", "to", "St", "ring", "(", ");", "\u010a", "to", "St", "ring", "(", ");"],
+      ids: [50258, 50288, 50359, 50363, 718, 257, 6585, 1111, 73, 13, 1353, 4520, 2937, 7, 34446, 198, 1353, 4520, 2937, 7, 34446, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> let a = obj.toString();\ntoString();<|endoftext|>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["\u0120This", "\u010a", "\u010a", "is", "\u010a", "a", "\u010a", "test", "."],
+      ids: [50258, 50288, 50359, 50363, 639, 198, 198, 271, 198, 64, 198, 31636, 13, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> This\n\nis\na\ntest.<|endoftext|>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["\u0120UN", "w", "ant", "\u00c3\u00a9d", ",", "running"],
+      ids: [50258, 50288, 50359, 50363, 8229, 86, 394, 7811, 11, 45482, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> UNwant\u00e9d,running<|endoftext|>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["\u01201", "\u0100", "2", "\u00ef\u00bf\u00bd", "3"],
+      ids: [50258, 50288, 50359, 50363, 502, 188, 17, 5342, 18, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> 1\u00002\ufffd3<|endoftext|>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["\u0120Hello", "\u0120World"],
+      ids: [50258, 50288, 50359, 50363, 2425, 3937, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> Hello World<|endoftext|>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["\u0120hello", "\u0120world"],
+      ids: [50258, 50288, 50359, 50363, 7751, 1002, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> hello world<|endoftext|>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u0120", "\u00e7\u0136\u0141\u00e6\u00b4\u00bb", "\u00e7\u013c\u0126", "\u00e7\u013e\u0141", "\u00e8\u00b0", "\u013d", "\u00e6\u013a\u00af"],
+      ids: [50258, 50288, 50359, 50363, 220, 49958, 1546, 6303, 8897, 249, 1541, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> \u751f\u6d3b\u7684\u771f\u8c1b\u662f<|endoftext|>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u0120", "\u0120", "\u0120leading", "\u0120space"],
+      ids: [50258, 50288, 50359, 50363, 220, 220, 5775, 1901, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|>   leading space<|endoftext|>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u0120", "tra", "iling", "\u0120space", "\u0120", "\u0120", "\u0120"],
+      ids: [50258, 50288, 50359, 50363, 220, 17227, 4883, 1901, 220, 220, 220, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> trailing space   <|endoftext|>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["\u0120Hi", "\u0120", "\u0120Hello"],
+      ids: [50258, 50288, 50359, 50363, 2421, 220, 2425, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> Hi  Hello<|endoftext|>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["\u0120", "test", "\u0120$", "1", "\u0120R", "2", "\u0120#", "3", "\u0120\u00e2\u0124\u00ac", "4", "\u0120\u00c2\u00a3", "5", "\u0120\u00c2", "\u00a5", "6", "\u0120\u00e2", "\u0124", "\u00a3", "7", "\u0120\u00e2", "\u0124", "\u00b9", "8", "\u0120\u00e2", "\u0124", "\u00b1", "9", "\u0120", "test"],
+      ids: [50258, 50288, 50359, 50363, 220, 31636, 1848, 16, 497, 17, 3536, 18, 17450, 19, 14378, 20, 1815, 98, 21, 672, 224, 96, 22, 672, 224, 117, 23, 672, 224, 109, 24, 220, 31636, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> test $1 R2 #3 \u20ac4 \u00a35 \u00a56 \u20a37 \u20b98 \u20b19 test<|endoftext|>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["\u0120I", "\u0120bought", "\u0120an", "\u0120apple", "\u0120for", "\u0120$", "1", ".", "00", "\u0120at", "\u0120", "the", "\u0120store", "."],
+      ids: [50258, 50288, 50359, 50363, 286, 4243, 364, 10606, 337, 1848, 16, 13, 628, 412, 220, 3322, 3531, 13, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> I bought an apple for $1.00 at the store.<|endoftext|>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u0120you", "\u00e2\u0122\u00a6", "\u0120", "\u0120"],
+      ids: [50258, 50288, 50359, 50363, 291, 1260, 220, 220, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> you\u2026  <|endoftext|>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u0120you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142"],
+      ids: [50258, 50288, 50359, 50363, 291, 1260, 126, 254, 126, 254, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> you\u2026\u00a0\u00a0<|endoftext|>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u0120you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142", "you", "\u00e2\u0122\u00a6", "\u00c2", "\u0142", "\u00c2", "\u0142"],
+      ids: [50258, 50288, 50359, 50363, 291, 1260, 126, 254, 126, 254, 5616, 1260, 126, 254, 126, 254, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> you\u2026\u00a0\u00a0you\u2026\u00a0\u00a0<|endoftext|>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["\u0120weird", "\u0120\u00ef", "\u00bd", "\u0140", "\u0120edge", "\u0120\u00ef", "\u00bd", "\u0140", "\u0120case"],
+      ids: [50258, 50288, 50359, 50363, 3657, 25072, 121, 252, 4691, 25072, 121, 252, 1389, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> weird \uff5e edge \uff5e case<|endoftext|>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u0120\u00e2\u0138", "\u0123", "This", "\u0120\u00e2\u0138", "\u0123", "is", "\u0120\u00e2\u0138", "\u0123", "a", "\u0120\u00e2\u0138", "\u0123", "test", "\u0120\u00e2\u0138", "\u0123", "."],
+      ids: [50258, 50288, 50359, 50363, 29405, 223, 5723, 29405, 223, 271, 29405, 223, 64, 29405, 223, 31636, 29405, 223, 13, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|> \u2581This \u2581is \u2581a \u2581test \u2581.<|endoftext|>",
+    },
+    SPECIAL_TOKENS: {
+      text: WHISPER_TEST_STRINGS.SPECIAL_TOKENS,
+      tokens: ["\u0120", "\u0120", "\u0120", "<|startoftranscript|>", "\u0120", "<|en|>", "\u0120", "\u0120", "\u0120"],
+      ids: [50258, 50288, 50359, 50363, 220, 220, 220, 50258, 220, 50259, 220, 220, 220, 50257],
+      decoded: "<|startoftranscript|><|no|><|transcribe|><|notimestamps|>   <|startoftranscript|> <|en|>   <|endoftext|>",
+    },
+  },
+};
+
+const MAX_EXECUTION_TIME = 10_000;
+export const CUSTOM_TESTS = () => {
+  describe("Decode ASR", () => {
+    it(
+      "should decode ASR outputs",
+      async () => {
+        const tokenizer = await WhisperTokenizer.from_pretrained("onnx-community/whisper-tiny.en_timestamped");
+
+        const model_outputs = [
+          {
+            stride: [30, 0, 5],
+            tokens: [50257n, 50362n, 8410n, 7283n, 0n, 2329n, 8410n, 7283n, 0n, 2094n, 470n, 1309n, 534n, 10625n, 307n, 10625n, 13n, 34668n, 345n, 531n, 9439n, 11n, 523n, 655n, 8410n, 7283n, 0n, 39134n, 16592n, 10625n, 0n, 9440n, 36n, 26751n, 0n, 25848n, 8410n, 7283n, 0n, 2773n, 661n, 4320n, 1943n, 981n, 345n, 821n, 8066n, 7765n, 510n, 290n, 670n, 1327n, 379n, 340n, 13n, 10528n, 318n, 5340n, 0n, 50256n],
+            token_timestamps: [0, 0, 0, 3.78, 4.22, 5.34, 6.04, 6.56, 7, 7.92, 8.58, 8.58, 8.88, 9.14, 9.54, 9.94, 10.58, 11.38, 11.88, 12.42, 12.62, 13, 13.36, 13.64, 14.26, 14.76, 15.12, 15.4, 15.74, 16.12, 16.66, 17.14, 17.24, 17.24, 17.72, 18.38, 18.6, 19.38, 19.92, 22.66, 22.9, 23.24, 23.5, 24.14, 24.56, 24.7, 24.72, 24.94, 25.18, 25.54, 25.72, 26.02, 26.34, 26.44, 26.84, 27.04, 27.16, 27.54, 28.06, 29.92],
+          },
+          {
+            stride: [30, 5, 5],
+            tokens: [50257n, 50362n, 2773n, 661n, 4320n, 1943n, 981n, 345n, 821n, 8066n, 7765n, 510n, 290n, 670n, 1327n, 379n, 340n, 13n, 10528n, 318n, 5340n, 13n, 921n, 815n, 651n, 284n, 262n, 966n, 810n, 2687n, 2073n, 561n, 11238n, 290n, 345n, 821n, 407n, 8066n, 2245n, 612n, 13n, 1400n, 11n, 644n, 389n, 345n, 4953n, 329n, 30n, 2141n, 340n, 0n, 2329n, 466n, 340n, 0n, 3363n, 345n, 460n, 0n, 2329n, 466n, 340n, 0n, 50256n],
+            token_timestamps: [0, 0, 0, 2.92, 3.24, 3.48, 4.14, 4.56, 4.7, 4.74, 4.92, 5.18, 5.54, 5.72, 6.04, 6.34, 6.46, 6.84, 7.04, 7.16, 7.54, 8.12, 10.16, 10.7, 10.9, 11.12, 11.24, 11.48, 11.84, 12.44, 12.82, 13.2, 13.46, 13.72, 14.06, 14.28, 14.34, 14.56, 14.8, 15.16, 15.9, 16.42, 16.82, 16.86, 17.02, 17.1, 17.22, 17.56, 18.06, 19.28, 19.62, 20.26, 21.96, 22.64, 24.28, 24.76, 25.18, 25.56, 25.78, 26.28, 27.12, 27.54, 27.82, 28.22, 29.48],
+          },
+          {
+            stride: [23.7728125, 5, 0],
+            tokens: [50257n, 50362n, 2329n, 466n, 340n, 0n, 3363n, 345n, 460n, 0n, 2329n, 466n, 340n, 0n, 1002n, 345n, 821n, 10032n, 286n, 3599n, 625n, 11n, 2245n, 3501n, 510n, 13n, 50256n],
+            token_timestamps: [0, 0, 0, 2.44, 4.3, 5.04, 5.06, 5.56, 5.8, 6.32, 7.12, 7.56, 7.8, 8.9, 10.92, 12.96, 13.28, 13.28, 13.44, 13.72, 13.96, 14.84, 15.5, 16.06, 16.86, 17.88, 20.92],
+          },
+        ];
+
+        const target = [
+          " DO IT! Just DO IT! Don't let your dreams be dreams. Yesterday you said tomorrow, so just DO IT! MAKE YOUR dreams! COME TRUE! JUST DO IT! Some people dream success while you're gonna wake up and work hard at it. Nothing is impossible. You should get to the point where anyone else would quit and you're not gonna stop there. No, what are you waiting for? Do it! Just do it! Yes you can! Just do it! If you're tired of starting over, stop giving up.",
+          {
+            chunks: [
+              { text: " DO", timestamp: [0.0, 3.78] },
+              { text: " IT!", timestamp: [3.78, 4.24 /* 5.34 */] },
+              { text: " Just", timestamp: [5.34, 6.04] },
+              { text: " DO", timestamp: [6.04, 6.56] },
+              { text: " IT!", timestamp: [6.56, 7.02 /* 7.92 */] },
+              { text: " Don't", timestamp: [7.92, 8.58] },
+              { text: " let", timestamp: [8.58, 8.88] },
+              { text: " your", timestamp: [8.88, 9.14] },
+              { text: " dreams", timestamp: [9.14, 9.54] },
+              { text: " be", timestamp: [9.54, 9.94] },
+              { text: " dreams.", timestamp: [9.94, 10.6 /* 11.38 */] },
+              { text: " Yesterday", timestamp: [11.38, 11.88] },
+              { text: " you", timestamp: [11.88, 12.42] },
+              { text: " said", timestamp: [12.42, 12.62] },
+              { text: " tomorrow,", timestamp: [12.62, 13.02 /* 13.36 */] },
+              { text: " so", timestamp: [13.36, 13.64] },
+              { text: " just", timestamp: [13.64, 14.26] },
+              { text: " DO", timestamp: [14.26, 14.76] },
+              { text: " IT!", timestamp: [14.76, 15.14 /* 15.4 */] },
+              { text: " MAKE", timestamp: [15.4, 15.74] },
+              { text: " YOUR", timestamp: [15.74, 16.12] },
+              { text: " dreams!", timestamp: [16.12, 16.68 /* 17.14 */] },
+              { text: " COME", timestamp: [17.14, 17.24] },
+              { text: " TRUE!", timestamp: [17.24, 17.74 /* 18.38 */] },
+              { text: " JUST", timestamp: [18.38, 18.6] },
+              { text: " DO", timestamp: [18.6, 19.38] },
+              { text: " IT!", timestamp: [19.38, 19.94 /* 22.66 */] },
+              { text: " Some", timestamp: [22.66, 22.9] },
+              { text: " people", timestamp: [22.9, 23.24] },
+              { text: " dream", timestamp: [23.24, 23.5] },
+              { text: " success", timestamp: [23.5, 24.14] },
+              { text: " while", timestamp: [24.14, 24.56] },
+              { text: " you're", timestamp: [24.56, 24.72] },
+              { text: " gonna", timestamp: [24.72, 24.94] },
+              { text: " wake", timestamp: [24.94, 25.18] },
+              { text: " up", timestamp: [25.18, 25.54] },
+              { text: " and", timestamp: [25.54, 25.72] },
+              { text: " work", timestamp: [25.72, 26.04] },
+              { text: " hard", timestamp: [26.04, 26.34] },
+              { text: " at", timestamp: [26.34, 26.46] },
+              { text: " it.", timestamp: [26.46, 26.86 /* 27.04 */] },
+              { text: " Nothing", timestamp: [27.04, 27.16] },
+              { text: " is", timestamp: [27.16, 27.54] },
+              { text: " impossible.", timestamp: [27.54, 28.14 /* 30.16 */] },
+              { text: " You", timestamp: [30.16, 30.7] },
+              { text: " should", timestamp: [30.7, 30.9] },
+              { text: " get", timestamp: [30.9, 31.12] },
+              { text: " to", timestamp: [31.12, 31.24] },
+              { text: " the", timestamp: [31.24, 31.48] },
+              { text: " point", timestamp: [31.48, 31.84] },
+              { text: " where", timestamp: [31.84, 32.44] },
+              { text: " anyone", timestamp: [32.44, 32.82] },
+              { text: " else", timestamp: [32.82, 33.2] },
+              { text: " would", timestamp: [33.2, 33.46] },
+              { text: " quit", timestamp: [33.46, 33.72] },
+              { text: " and", timestamp: [33.72, 34.06] },
+              { text: " you're", timestamp: [34.06, 34.34] },
+              { text: " not", timestamp: [34.34, 34.56] },
+              { text: " gonna", timestamp: [34.56, 34.8] },
+              { text: " stop", timestamp: [34.8, 35.16] },
+              { text: " there.", timestamp: [35.16, 35.92 /* 36.42 */] },
+              { text: " No,", timestamp: [36.42, 36.84 /* 36.86 */] },
+              { text: " what", timestamp: [36.86, 37.02] },
+              { text: " are", timestamp: [37.02, 37.1] },
+              { text: " you", timestamp: [37.1, 37.22] },
+              { text: " waiting", timestamp: [37.22, 37.56] },
+              { text: " for?", timestamp: [37.56, 38.08 /* 39.28 */] },
+              { text: " Do", timestamp: [39.28, 39.62] },
+              { text: " it!", timestamp: [39.62, 40.28 /* 41.96 */] },
+              { text: " Just", timestamp: [41.96, 42.64] },
+              { text: " do", timestamp: [42.64, 44.28] },
+              { text: " it!", timestamp: [44.28, 44.78 /* 45.18 */] },
+              { text: " Yes", timestamp: [45.18, 45.56] },
+              { text: " you", timestamp: [45.56, 45.78] },
+              { text: " can!", timestamp: [45.8, 46.34 /* 47.12 */] },
+              { text: " Just", timestamp: [47.12, 47.56] },
+              { text: " do", timestamp: [47.56, 47.8] },
+              { text: " it!", timestamp: [47.8, 48.92 /* 50.92 */] },
+              { text: " If", timestamp: [50.92, 52.96] },
+              { text: " you're", timestamp: [52.96, 53.28] },
+              { text: " tired", timestamp: [53.28, 53.44] },
+              { text: " of", timestamp: [53.44, 53.72] },
+              { text: " starting", timestamp: [53.72, 53.96] },
+              { text: " over,", timestamp: [53.96, 54.86 /* 55.5 */] },
+              { text: " stop", timestamp: [55.5, 56.06] },
+              { text: " giving", timestamp: [56.06, 56.86] },
+              { text: " up.", timestamp: [56.86, 57.9 /* 60.92 */] },
+            ],
+          },
+        ];
+
+        compare(
+          tokenizer._decode_asr(model_outputs, {
+            return_timestamps: "word",
+            time_precision: 0.02,
+            force_full_sequences: false,
+          }),
+          target,
+          1e-2,
+        );
+      },
+      MAX_EXECUTION_TIME,
+    );
+
+    it(
+      "should handle overlapping edge case",
+      async () => {
+        const tokenizer = await WhisperTokenizer.from_pretrained("onnx-community/whisper-tiny.en_timestamped");
+
+        const model_outputs = [
+          {
+            stride: [30, 0, 5],
+            tokens: [50257n, 50362n, 8410n, 7283n, 0n, 2329n, 8410n, 7283n, 0n, 2094n, 470n, 1309n, 534n, 10625n, 307n, 10625n, 13n, 34668n, 11n, 345n, 531n, 9439n, 11n, 523n, 655n, 8410n, 7283n, 0n, 39134n, 16592n, 10560n, 3955n, 50n, 0n, 7102n, 5446n, 46n, 0n, 25848n, 8410n, 7283n, 0n, 2773n, 661n, 4320n, 1943n, 981n, 345n, 821n, 8066n, 7765n, 510n, 290n, 670n, 1327n, 379n, 340n, 13n, 10528n, 318n, 5340n, 13n, 50256n],
+            token_timestamps: [0, 0, 0, 3.78, 4.22, 5.26, 6.04, 6.54, 7, 7.94, 8.58, 8.58, 8.88, 9.16, 9.54, 9.94, 10.6, 11.38, 11.88, 12.38, 12.44, 12.62, 13, 13.36, 13.64, 14.24, 14.74, 15.12, 15.4, 15.74, 16.1, 16.54, 16.54, 16.78, 17.08, 17.2, 17.36, 17.56, 18.08, 18.58, 19.38, 19.88, 22.54, 22.9, 23.24, 23.5, 24.14, 24.56, 24.7, 24.94, 24.94, 25.18, 25.54, 25.72, 26.04, 26.34, 26.46, 26.84, 27.04, 27.14, 27.54, 28.06, 29.92],
+          },
+          {
+            stride: [30, 5, 5],
+            tokens: [50257n, 50362n, 2773n, 661n, 4320n, 1943n, 981n, 345n, 821n, 8066n, 7765n, 510n, 290n, 670n, 1327n, 379n, 340n, 13n, 10528n, 318n, 5340n, 13n, 921n, 815n, 651n, 284n, 262n, 966n, 810n, 2687n, 2073n, 561n, 11238n, 290n, 345n, 821n, 407n, 8066n, 2245n, 612n, 13n, 1400n, 11n, 644n, 389n, 345n, 4953n, 329n, 30n, 2141n, 340n, 0n, 2329n, 466n, 340n, 0n, 3363n, 11n, 345n, 460n, 0n, 2329n, 466n, 340n, 0n, 50256n],
+            token_timestamps: [0, 0, 0, 2.92, 3.24, 3.5, 4.14, 4.56, 4.7, 4.74, 4.92, 5.18, 5.54, 5.74, 6.04, 6.34, 6.46, 6.84, 7.04, 7.18, 7.56, 8.12, 9.68, 10.7, 10.88, 11.1, 11.24, 11.48, 11.82, 12.46, 12.82, 13.2, 13.46, 13.72, 14.08, 14.28, 14.34, 14.56, 14.82, 15.16, 15.72, 16.42, 16.82, 16.86, 17, 17.1, 17.2, 17.56, 18.06, 19.28, 19.6, 20.28, 21.96, 22.64, 24.28, 24.76, 25.18, 25.56, 25.56, 25.84, 26.36, 27.12, 27.54, 27.82, 28.16, 29.48],
+          },
+          {
+            stride: [23.7728125, 5, 0],
+            tokens: [50257n, 50362n, 2329n, 466n, 340n, 0n, 3363n, 345n, 460n, 0n, 2329n, 466n, 340n, 0n, 1002n, 534n, 15867n, 318n, 3599n, 625n, 11n, 2245n, 3501n, 510n, 13n, 50256n],
+            token_timestamps: [0, 0, 0, 2.44, 4.3, 5.04, 5.06, 5.56, 5.8, 6.32, 7.12, 7.56, 7.8, 8.72, 10.04, 12.96, 13.3, 13.44, 13.72, 13.98, 14.86, 15.5, 16, 16.88, 17.76, 20.9],
+          },
+        ];
+
+        const target = [
+          " DO IT! Just DO IT! Don't let your dreams be dreams. Yesterday, you said tomorrow, so just DO IT! MAKE YOUR DRIMS! CONTRO! JUST DO IT! Some people dream success while you're gonna wake up and work hard at it. Nothing is impossible. You should get to the point where anyone else would quit and you're not gonna stop there. No, what are you waiting for? Do it! Just do it! Yes, you can! Just do it! If your tire is starting over, stop giving up.",
+          {
+            chunks: [
+              { text: " DO", timestamp: [0, 3.78] },
+              { text: " IT!", timestamp: [3.78, 4.24] },
+              { text: " Just", timestamp: [5.26, 6.04] },
+              { text: " DO", timestamp: [6.04, 6.54] },
+              { text: " IT!", timestamp: [6.54, 7.02] },
+              { text: " Don't", timestamp: [7.94, 8.58] },
+              { text: " let", timestamp: [8.58, 8.88] },
+              { text: " your", timestamp: [8.88, 9.16] },
+              { text: " dreams", timestamp: [9.16, 9.54] },
+              { text: " be", timestamp: [9.54, 9.94] },
+              { text: " dreams.", timestamp: [9.94, 10.62] },
+              { text: " Yesterday,", timestamp: [11.38, 11.9] },
+              { text: " you", timestamp: [12.38, 12.44] },
+              { text: " said", timestamp: [12.44, 12.62] },
+              { text: " tomorrow,", timestamp: [12.62, 13.02] },
+              { text: " so", timestamp: [13.36, 13.64] },
+              { text: " just", timestamp: [13.64, 14.24] },
+              { text: " DO", timestamp: [14.24, 14.74] },
+              { text: " IT!", timestamp: [14.74, 15.14] },
+              { text: " MAKE", timestamp: [15.4, 15.74] },
+              { text: " YOUR", timestamp: [15.74, 16.1] },
+              { text: " DRIMS!", timestamp: [16.1, 16.8] },
+              { text: " CONTRO!", timestamp: [17.08, 17.58] },
+              { text: " JUST", timestamp: [18.08, 18.58] },
+              { text: " DO", timestamp: [18.58, 19.38] },
+              { text: " IT!", timestamp: [19.38, 19.9] },
+              { text: " Some", timestamp: [22.54, 22.9] },
+              { text: " people", timestamp: [22.9, 23.24] },
+              { text: " dream", timestamp: [23.24, 23.5] },
+              { text: " success", timestamp: [23.5, 24.14] },
+              { text: " while", timestamp: [24.14, 24.56] },
+              { text: " you're", timestamp: [24.56, 24.94] },
+              { text: " gonna", timestamp: [24.94, 24.94] },
+              { text: " wake", timestamp: [24.94, 25.18] },
+              { text: " up", timestamp: [25.18, 25.54] },
+              { text: " and", timestamp: [25.54, 25.74] },
+              { text: " work", timestamp: [25.74, 26.04] },
+              { text: " hard", timestamp: [26.04, 26.34] },
+              { text: " at", timestamp: [26.34, 26.46] },
+              { text: " it.", timestamp: [26.46, 26.86] },
+              { text: " Nothing", timestamp: [27.04, 27.18] },
+              { text: " is", timestamp: [27.18, 27.56] },
+              { text: " impossible.", timestamp: [27.56, 28.14] },
+              { text: " You", timestamp: [29.68, 30.7] },
+              { text: " should", timestamp: [30.7, 30.88] },
+              { text: " get", timestamp: [30.88, 31.1] },
+              { text: " to", timestamp: [31.1, 31.24] },
+              { text: " the", timestamp: [31.24, 31.48] },
+              { text: " point", timestamp: [31.48, 31.82] },
+              { text: " where", timestamp: [31.82, 32.46] },
+              { text: " anyone", timestamp: [32.46, 32.82] },
+              { text: " else", timestamp: [32.82, 33.2] },
+              { text: " would", timestamp: [33.2, 33.46] },
+              { text: " quit", timestamp: [33.46, 33.72] },
+              { text: " and", timestamp: [33.72, 34.08] },
+              { text: " you're", timestamp: [34.08, 34.34] },
+              { text: " not", timestamp: [34.34, 34.56] },
+              { text: " gonna", timestamp: [34.56, 34.82] },
+              { text: " stop", timestamp: [34.82, 35.16] },
+              { text: " there.", timestamp: [35.16, 35.74] },
+              { text: " No,", timestamp: [36.42, 36.84] },
+              { text: " what", timestamp: [36.86, 37] },
+              { text: " are", timestamp: [37, 37.1] },
+              { text: " you", timestamp: [37.1, 37.2] },
+              { text: " waiting", timestamp: [37.2, 37.56] },
+              { text: " for?", timestamp: [37.56, 38.08] },
+              { text: " Do", timestamp: [39.28, 39.6] },
+              { text: " it!", timestamp: [39.6, 40.3] },
+              { text: " Just", timestamp: [41.96, 42.64] },
+              { text: " do", timestamp: [42.64, 44.28] },
+              { text: " it!", timestamp: [44.28, 44.78] },
+              { text: " Yes,", timestamp: [45.18, 45.56] },
+              { text: " you", timestamp: [45.56, 45.84] },
+              { text: " can!", timestamp: [45.8, 46.34] },
+              { text: " Just", timestamp: [47.12, 47.56] },
+              { text: " do", timestamp: [47.56, 47.8] },
+              { text: " it!", timestamp: [47.8, 48.74] },
+              { text: " If", timestamp: [50.04, 52.96] },
+              { text: " your", timestamp: [52.96, 53.3] },
+              { text: " tire", timestamp: [53.3, 53.44] },
+              { text: " is", timestamp: [53.44, 53.72] },
+              { text: " starting", timestamp: [53.72, 53.98] },
+              { text: " over,", timestamp: [53.98, 54.88] },
+              { text: " stop", timestamp: [55.5, 56] },
+              { text: " giving", timestamp: [56, 56.88] },
+              { text: " up.", timestamp: [56.88, 57.78] },
+            ],
+          },
+        ];
+
+        compare(
+          tokenizer._decode_asr(model_outputs, {
+            return_timestamps: "word",
+            time_precision: 0.02,
+            force_full_sequences: false,
+          }),
+          target,
+          1e-2,
+        );
+      },
+      MAX_EXECUTION_TIME,
+    );
+  });
+};
diff --git a/tests/models/xlm-roberta/tokenization.js b/tests/models/xlm-roberta/tokenization.js
new file mode 100644
index 000000000..67d02ce8d
--- /dev/null
+++ b/tests/models/xlm-roberta/tokenization.js
@@ -0,0 +1,332 @@
+import { XLMRobertaTokenizer } from "../../../src/tokenizers.js";
+import { BASE_TEST_STRINGS } from "../test_strings.js";
+
+export const TOKENIZER_CLASS = XLMRobertaTokenizer;
+export const TEST_CONFIG = {
+  "Xenova/bge-reranker-base": {
+    SIMPLE: {
+      text: BASE_TEST_STRINGS.SIMPLE,
+      tokens: ["\u2581How", "\u2581are", "\u2581you", "\u2581doing", "?"],
+      ids: [0, 11249, 621, 398, 20594, 32, 2],
+      decoded: "<s> How are you doing?</s>",
+    },
+    SIMPLE_WITH_PUNCTUATION: {
+      text: BASE_TEST_STRINGS.SIMPLE_WITH_PUNCTUATION,
+      tokens: ["\u2581You", "\u2581should", "'", "ve", "\u2581done", "\u2581this"],
+      ids: [0, 2583, 5608, 25, 272, 16940, 903, 2],
+      decoded: "<s> You should've done this</s>",
+    },
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["\u258101", "23", "456", "789", "\u25810", "\u25811", "\u25812", "\u25813", "\u25814", "\u25815", "\u25816", "\u25817", "\u25818", "\u25819", "\u258110", "\u2581100", "\u25811000"],
+      ids: [0, 3413, 3742, 121317, 153781, 757, 106, 116, 138, 201, 190, 305, 361, 382, 483, 209, 805, 4382, 2],
+      decoded: "<s> 0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000</s>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["\u2581The", "\u2581company", "\u2581was", "\u2581found", "ed", "\u2581in", "\u25812016."],
+      ids: [0, 581, 14380, 509, 14037, 297, 23, 6360, 2],
+      decoded: "<s> The company was founded in 2016.</s>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["\u2581A", "\u2581'", "ll", "\u2581!!", "to", "?", "'", "d", "''", "d", "\u2581of", ",", "\u2581can", "'", "t", "."],
+      ids: [0, 62, 242, 1181, 6506, 188, 32, 25, 71, 4765, 71, 111, 4, 831, 25, 18, 5, 2],
+      decoded: "<s> A 'll!!to?'d''d of, can't.</s>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["\u2581de", "f", "\u2581main", "(", "):", "\u2581pass"],
+      ids: [0, 8, 420, 5201, 132, 2077, 27875, 2],
+      decoded: "<s> def main(): pass</s>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["\u2581let", "\u2581a", "\u2581=", "\u2581ob", "j", ".", "to", "Str", "ing", "(", ");", "\u2581to", "Str", "ing", "(", ");"],
+      ids: [0, 2633, 10, 2203, 995, 170, 5, 188, 71713, 214, 132, 3142, 47, 71713, 214, 132, 3142, 2],
+      decoded: "<s> let a = obj.toString(); toString();</s>",
+    },
+    NEWLINES: {
+      text: BASE_TEST_STRINGS.NEWLINES,
+      tokens: ["\u2581This", "\u2581is", "\u2581a", "\u2581test", "."],
+      ids: [0, 3293, 83, 10, 3034, 5, 2],
+      decoded: "<s> This is a test.</s>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["\u2581UN", "wan", "t\u00e9", "d", ",", "run", "ning"],
+      ids: [0, 8274, 3206, 2312, 71, 4, 16428, 592, 2],
+      decoded: "<s> UNwant\u00e9d,running</s>",
+    },
+    CONTROL_TOKENS: {
+      text: BASE_TEST_STRINGS.CONTROL_TOKENS,
+      tokens: ["\u25811", "\u0000", "2", "\u25813"],
+      ids: [0, 106, 3, 304, 138, 2],
+      decoded: "<s> 1<unk>2 3</s>",
+    },
+    HELLO_WORLD_TITLECASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_TITLECASE,
+      tokens: ["\u2581Hello", "\u2581World"],
+      ids: [0, 35378, 6661, 2],
+      decoded: "<s> Hello World</s>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["\u2581hell", "o", "\u2581world"],
+      ids: [0, 33600, 31, 8999, 2],
+      decoded: "<s> hello world</s>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u2581", "\u751f\u6d3b\u7684", "\u771f", "\u8c1b", "\u662f"],
+      ids: [0, 6, 62668, 5364, 245875, 354, 2],
+      decoded: "<s> \u751f\u6d3b\u7684\u771f\u8c1b\u662f</s>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u2581leading", "\u2581space"],
+      ids: [0, 105207, 32628, 2],
+      decoded: "<s> leading space</s>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581trail", "ing", "\u2581space", "\u2581"],
+      ids: [0, 141037, 214, 32628, 6, 2],
+      decoded: "<s> trailing space </s>",
+    },
+    DOUBLE_SPACE: {
+      text: BASE_TEST_STRINGS.DOUBLE_SPACE,
+      tokens: ["\u2581Hi", "\u2581Hello"],
+      ids: [0, 2673, 35378, 2],
+      decoded: "<s> Hi Hello</s>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["\u2581test", "\u2581$1", "\u2581R", "2", "\u2581#3", "\u2581\u20ac", "4", "\u2581\u00a3", "5", "\u2581", "\u00a5", "6", "\u2581", "\u20a3", "7", "\u2581\u20b9", "8", "\u2581", "\u20b1", "9", "\u2581test"],
+      ids: [0, 3034, 38629, 627, 304, 111378, 2505, 617, 11762, 758, 6, 32389, 910, 6, 3, 966, 87316, 1019, 6, 247425, 1126, 3034, 2],
+      decoded: "<s> test $1 R2 #3 \u20ac4 \u00a35 \u00a56 <unk>7 \u20b98 \u20b19 test</s>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["\u2581I", "\u2581bought", "\u2581an", "\u2581apple", "\u2581for", "\u2581$", "1.00", "\u2581at", "\u2581the", "\u2581store", "."],
+      ids: [0, 87, 123997, 142, 108787, 100, 3650, 146533, 99, 70, 4343, 5, 2],
+      decoded: "<s> I bought an apple for $1.00 at the store.</s>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u2581you", "...", "\u2581"],
+      ids: [0, 398, 27, 6, 2],
+      decoded: "<s> you... </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u2581you", "...", "\u2581"],
+      ids: [0, 398, 27, 6, 2],
+      decoded: "<s> you... </s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u2581you", "...", "\u2581you", "...", "\u2581"],
+      ids: [0, 398, 27, 398, 27, 6, 2],
+      decoded: "<s> you... you... </s>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["\u2581weird", "\u2581", "\uff5e", "\u2581edge", "\u2581", "\uff5e", "\u2581case"],
+      ids: [0, 179459, 6, 6087, 121303, 6, 6087, 7225, 2],
+      decoded: "<s> weird \uff5e edge \uff5e case</s>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581This", "\u2581is", "\u2581a", "\u2581test", "\u2581", "."],
+      ids: [0, 3293, 83, 10, 3034, 6, 5, 2],
+      decoded: "<s> This is a test.</s>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u2581", "\ud83d\ude02", "\u2581", "\ud83d\udc4d", "\u2581", "\ud83e\udd23", "\u2581", "\ud83d\ude0d", "\u2581", "\ud83d\ude2d", "\u2581", "\ud83c\udf89", "\u2581", "\ud83d\ude4f", "\u2581", "\ud83d\ude0a", "\u2581", "\ud83d\udd25", "\u2581", "\ud83d\ude01", "\u2581", "\ud83d\ude05", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\ude06", "\u2581", "\ud83d\udc4f", "\u2581\u2764", "\ufe0f", "\u2581", "\ud83d\udc9c", "\u2581", "\ud83d\udc9a", "\u2581", "\ud83d\udc97", "\u2581", "\ud83d\udc99", "\u2581", "\ud83d\udda4", "\u2581", "\ud83d\ude0e", "\u2581", "\ud83d\udc4c", "\u2581", "\ud83e\udd73", "\u2581", "\ud83d\udcaa", "\u2581", "\u2728", "\u2581", "\ud83d\udc49", "\u2581", "\ud83d\udc40", "\u2581", "\ud83d\udcaf", "\u2581", "\ud83c\udf88", "\u2581", "\ud83d\ude48", "\u2581", "\ud83d\ude4c", "\u2581", "\ud83d\udc80", "\u2581", "\ud83d\udc47", "\u2581", "\ud83d\udc4b", "\u2581", "\u2705", "\u2581", "\ud83c\udf81", "\u2581", "\ud83c\udf1e", "\u2581", "\ud83c\udf38", "\u2581", "\ud83d\udcb0"],
+      ids: [0, 6, 115114, 6, 118280, 6, 243385, 6, 84464, 6, 232773, 6, 243816, 6, 113612, 6, 82803, 6, 222326, 6, 201344, 6, 239569, 6, 243544, 6, 191876, 6, 243404, 49933, 15755, 6, 244233, 6, 244162, 6, 244181, 6, 243892, 6, 245820, 6, 161546, 6, 204811, 6, 3, 6, 238992, 6, 167474, 6, 120242, 6, 245561, 6, 244864, 6, 246144, 6, 244459, 6, 244703, 6, 246887, 6, 144400, 6, 246511, 6, 142325, 6, 244230, 6, 245559, 6, 243374, 6, 245200, 2],
+      decoded: "<s> \ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 \ud83d\ude05 \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c <unk> \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf \ud83c\udf88 \ud83d\ude48 \ud83d\ude4c \ud83d\udc80 \ud83d\udc47 \ud83d\udc4b \u2705 \ud83c\udf81 \ud83c\udf1e \ud83c\udf38 \ud83d\udcb0</s>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2581", "\u2728", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\udc41", "\ufe0f", "\u2581", "\ud83d\udc71", "\ud83c\udffb", "\u2581", "\ud83d\udd75", "\u2581", "\u2642", "\ufe0f", "\u2581", "\ud83e\uddd9", "\ud83c\udffb", "\u2581", "\u2642", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u2581", "\ud83c\udf3e", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83d\udc69", "\u2581\u2764", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc67", "\u2581", "\ud83d\udc66", "\u2581", "\ud83e\uddd1", "\ud83c\udffb", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1", "\ud83c\udffb", "\u2581", "\ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u2581\u2764", "\ufe0f", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\ud83c\udffc"],
+      ids: [0, 6, 167474, 6, 243544, 6, 246984, 15755, 6, 247201, 79500, 6, 248325, 6, 228250, 15755, 6, 3, 79500, 6, 228250, 6, 244314, 79500, 6, 246529, 6, 3, 6, 247443, 6, 3, 6, 244785, 49933, 6, 244960, 6, 244314, 6, 244785, 6, 244785, 6, 245719, 6, 246167, 6, 3, 79500, 6, 247443, 6, 3, 79500, 6, 3, 6, 244314, 79500, 49933, 15755, 6, 244960, 6, 244314, 239719, 2],
+      decoded: "<s> \u2728 \ud83e\udd17 \ud83d\udc41\ufe0f \ud83d\udc71\ud83c\udffb \ud83d\udd75 \u2642\ufe0f <unk>\ud83c\udffb \u2642 \ud83d\udc68\ud83c\udffb \ud83c\udf3e <unk> \ud83e\udd1d <unk> \ud83d\udc69 \u2764 \ud83d\udc8b \ud83d\udc68 \ud83d\udc69 \ud83d\udc69 \ud83d\udc67 \ud83d\udc66 <unk>\ud83c\udffb \ud83e\udd1d <unk>\ud83c\udffb <unk> \ud83d\udc68\ud83c\udffb \u2764\ufe0f \ud83d\udc8b \ud83d\udc68\ud83c\udffc</s>",
+    },
+  },
+  "Xenova/paraphrase-multilingual-mpnet-base-v2": {
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581trail", "ing", "\u2581space"],
+      ids: [0, 141037, 214, 32628, 2],
+      decoded: "<s> trailing space</s>",
+    },
+    ELLIPSIS: {
+      text: BASE_TEST_STRINGS.ELLIPSIS,
+      tokens: ["\u2581you", "..."],
+      ids: [0, 398, 27, 2],
+      decoded: "<s> you...</s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS,
+      tokens: ["\u2581you", "..."],
+      ids: [0, 398, 27, 2],
+      decoded: "<s> you...</s>",
+    },
+    TEXT_WITH_ESCAPE_CHARACTERS_2: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_ESCAPE_CHARACTERS_2,
+      tokens: ["\u2581you", "...", "\u2581you", "..."],
+      ids: [0, 398, 27, 398, 27, 2],
+      decoded: "<s> you... you...</s>",
+    },
+  },
+  "Xenova/donut-base-finetuned-docvqa": {
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["\u258101", "23", "45", "67", "89", "\u25810", "\u25811", "\u25812", "\u25813", "\u25814", "\u25815", "\u25816", "\u25817", "\u25818", "\u25819", "\u258110", "\u2581100", "\u25811000"],
+      ids: [0, 37391, 38611, 41742, 18610, 20121, 50891, 1314, 3822, 9066, 22081, 20017, 35977, 38100, 38873, 42378, 23485, 52285, 40881, 2],
+      decoded: "<s> 0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000</s>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["\u2581A", "\u2581'", "ll", "\u2581!!", "to", "?", "'", "d", "'", "'", "d", "\u2581of", ",", "\u2581can", "'", "t", "."],
+      ids: [0, 46518, 28559, 4558, 47751, 19616, 36209, 28431, 49224, 28431, 28431, 49224, 2587, 35815, 53017, 28431, 16191, 39539, 2],
+      decoded: "<s> A 'll!!to?'d''d of, can't.</s>",
+    },
+    LEADING_SPACE: {
+      text: BASE_TEST_STRINGS.LEADING_SPACE,
+      tokens: ["\u2581lead", "ing", "\u2581space"],
+      ids: [0, 38498, 24357, 36833, 2],
+      decoded: "<s> leading space</s>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581trai", "ling", "\u2581space"],
+      ids: [0, 14262, 23291, 36833, 2],
+      decoded: "<s> trailing space</s>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["\u2581test", "\u2581$1", "\u2581R", "2", "\u2581#", "3", "\u2581\u20ac", "4", "\u2581\u00a3", "5", "\u2581", "\u00a5", "6", "\u2581", "\u20a3", "7", "\u2581", "\u20b9", "8", "\u2581", "\u20b1", "9", "\u2581test"],
+      ids: [0, 35950, 39065, 46982, 35934, 41882, 38167, 33874, 46702, 4467, 50934, 42990, 36748, 55144, 42990, 3, 56620, 42990, 31354, 486, 42990, 3, 3200, 35950, 2],
+      decoded: "<s> test $1 R2 #3 \u20ac4 \u00a35 \u00a56 <unk>7 \u20b98 <unk>9 test</s>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["\u2581I", "\u2581bought", "\u2581an", "\u2581app", "le", "\u2581for", "\u2581$1", ".", "00", "\u2581at", "\u2581the", "\u2581store", "."],
+      ids: [0, 53821, 6018, 9971, 39627, 7897, 57245, 39065, 39539, 49351, 56980, 48941, 40747, 39539, 2],
+      decoded: "<s> I bought an apple for $1.00 at the store.</s>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["\u2581we", "ird", "\u2581", "\uff5e", "\u2581ed", "ge", "\u2581", "\uff5e", "\u2581case"],
+      ids: [0, 47450, 52806, 42990, 46476, 25847, 40548, 42990, 46476, 49911, 2],
+      decoded: "<s> weird \uff5e edge \uff5e case</s>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u2581", "\ud83d\ude02", "\u2581", "\ud83d\udc4d", "\u2581", "\ud83e\udd23", "\u2581", "\ud83d\ude0d", "\u2581", "\ud83d\ude2d", "\u2581", "\ud83c\udf89", "\u2581", "\ud83d\ude4f", "\u2581", "\ud83d\ude0a", "\u2581", "\ud83d\udd25", "\u2581", "\ud83d\ude01", "\u2581", "\ud83d\ude05", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\ude06", "\u2581", "\ud83d\udc4f", "\u2581", "\u2764\ufe0f", "\u2581", "\ud83d\udc9c", "\u2581", "\ud83d\udc9a", "\u2581", "\ud83d\udc97", "\u2581", "\ud83d\udc99", "\u2581", "\ud83d\udda4", "\u2581", "\ud83d\ude0e", "\u2581", "\ud83d\udc4c", "\u2581", "\ud83e\udd73", "\u2581", "\ud83d\udcaa", "\u2581", "\u2728", "\u2581", "\ud83d\udc49", "\u2581", "\ud83d\udc40", "\u2581", "\ud83d\udcaf", "\u2581", "\ud83c\udf88", "\u2581", "\ud83d\ude48", "\u2581", "\ud83d\ude4c", "\u2581", "\ud83d\udc80", "\u2581", "\ud83d\udc47", "\u2581", "\ud83d\udc4b", "\u2581", "\u2705", "\u2581", "\ud83c\udf81", "\u2581", "\ud83c\udf1e", "\u2581", "\ud83c\udf38", "\u2581", "\ud83d\udcb0"],
+      ids: [0, 42990, 3864, 42990, 3, 42990, 28873, 42990, 3, 42990, 3, 42990, 29257, 42990, 3, 42990, 3, 42990, 3, 42990, 22310, 42990, 3, 42990, 29017, 42990, 3, 42990, 28890, 42990, 3, 42990, 29601, 42990, 3, 42990, 29564, 42990, 3, 42990, 3, 42990, 14430, 42990, 3, 42990, 3, 42990, 28292, 42990, 3, 42990, 3, 42990, 30688, 42990, 30146, 42990, 3, 42990, 29798, 42990, 3, 42990, 3, 42990, 3, 42990, 3, 42990, 10257, 42990, 3, 42990, 30687, 42990, 3, 42990, 3, 2],
+      decoded: "<s> \ud83d\ude02 <unk> \ud83e\udd23 <unk> <unk> \ud83c\udf89 <unk> <unk> <unk> \ud83d\ude01 <unk> \ud83e\udd17 <unk> \ud83d\udc4f <unk> \ud83d\udc9c <unk> \ud83d\udc97 <unk> <unk> \ud83d\ude0e <unk> <unk> \ud83d\udcaa <unk> <unk> \ud83d\udc40 \ud83d\udcaf <unk> \ud83d\ude48 <unk> <unk> <unk> <unk> \u2705 <unk> \ud83c\udf1e <unk> <unk></s>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2581", "\u2728", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\udc41\ufe0f", "\u2581", "\ud83d\udc71\ud83c\udffb", "\u2581", "\ud83d\udd75", "\u2581", "\u2642", "\ufe0f", "\u2581", "\ud83e\uddd9\ud83c\udffb", "\u2581", "\u2642", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u2581", "\ud83c\udf3e", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83d\udc69", "\u2581", "\u2764", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc67", "\u2581", "\ud83d\udc66", "\u2581", "\ud83e\uddd1\ud83c\udffb", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1\ud83c\udffb", "\u2581", "\ud83c\udff4\udb40\udc67\udb40\udc62\udb40\udc65\udb40\udc6e\udb40\udc67\udb40\udc7f", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u2581", "\u2764\ufe0f", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\ud83c\udffc"],
+      ids: [0, 42990, 3, 42990, 29017, 42990, 3, 42990, 3, 42990, 3, 42990, 26803, 3, 42990, 3, 42990, 26803, 42990, 29673, 3, 42990, 3, 42990, 3, 42990, 3, 42990, 3, 42990, 30079, 42990, 3, 42990, 30218, 42990, 29673, 42990, 30079, 42990, 30079, 42990, 30799, 42990, 31120, 42990, 3, 42990, 3, 42990, 3, 42990, 3, 42990, 29673, 3, 42990, 3, 42990, 30218, 42990, 29673, 28396, 2],
+      decoded: "<s> <unk> \ud83e\udd17 <unk> <unk> <unk> \u2642<unk> <unk> \u2642 \ud83d\udc68<unk> <unk> <unk> <unk> <unk> \ud83d\udc69 <unk> \ud83d\udc8b \ud83d\udc68 \ud83d\udc69 \ud83d\udc69 \ud83d\udc67 \ud83d\udc66 <unk> <unk> <unk> <unk> \ud83d\udc68<unk> <unk> \ud83d\udc8b \ud83d\udc68\ud83c\udffc</s>",
+    },
+  },
+  "Xenova/trocr-small-handwritten": {
+    NUMBERS: {
+      text: BASE_TEST_STRINGS.NUMBERS,
+      tokens: ["\u25810", "123", "45", "67", "89", "\u25810", "\u25811", "\u25812", "\u25813", "\u25814", "\u25815", "\u25816", "\u25817", "\u25818", "\u25819", "\u258110", "\u2581100", "\u25811000"],
+      ids: [0, 1596, 31702, 5356, 8248, 7385, 1596, 267, 252, 271, 319, 331, 467, 531, 539, 641, 274, 921, 8401, 2],
+      decoded: "<s> 0123456789 0 1 2 3 4 5 6 7 8 9 10 100 1000</s>",
+    },
+    TEXT_WITH_NUMBERS: {
+      text: BASE_TEST_STRINGS.TEXT_WITH_NUMBERS,
+      tokens: ["\u2581The", "\u2581company", "\u2581was", "\u2581founded", "\u2581in", "\u25812016."],
+      ids: [0, 25, 215, 19, 3027, 12, 2401, 2],
+      decoded: "<s> The company was founded in 2016.</s>",
+    },
+    PUNCTUATION: {
+      text: BASE_TEST_STRINGS.PUNCTUATION,
+      tokens: ["\u2581A", "\u2581'", "ll", "\u2581!", "!", "to", "?'", "d", "'", "'", "d", "\u2581of", ",", "\u2581can", "'", "t", "."],
+      ids: [0, 94, 26, 184, 118, 338, 436, 16368, 109, 27, 27, 109, 9, 6, 64, 27, 31, 5, 2],
+      decoded: "<s> A 'll!!to?'d''d of, can't.</s>",
+    },
+    PYTHON_CODE: {
+      text: BASE_TEST_STRINGS.PYTHON_CODE,
+      tokens: ["\u2581def", "\u2581main", "(", "):", "\u2581pass"],
+      ids: [0, 13114, 830, 2126, 5056, 1080, 2],
+      decoded: "<s> def main(): pass</s>",
+    },
+    JAVASCRIPT_CODE: {
+      text: BASE_TEST_STRINGS.JAVASCRIPT_CODE,
+      tokens: ["\u2581let", "\u2581a", "\u2581=", "\u2581obj", ".", "to", "String", "(", ");", "\u2581to", "String", "(", ");"],
+      ids: [0, 393, 10, 2219, 48394, 5, 436, 34868, 2126, 3671, 7, 34868, 2126, 3671, 2],
+      decoded: "<s> let a = obj.toString(); toString();</s>",
+    },
+    BASIC: {
+      text: BASE_TEST_STRINGS.BASIC,
+      tokens: ["\u2581UN", "wan", "t\u00e9", "d", ",", "running"],
+      ids: [0, 3225, 6327, 12529, 109, 6, 11484, 2],
+      decoded: "<s> UNwant\u00e9d,running</s>",
+    },
+    HELLO_WORLD_LOWERCASE: {
+      text: BASE_TEST_STRINGS.HELLO_WORLD_LOWERCASE,
+      tokens: ["\u2581hello", "\u2581world"],
+      ids: [0, 12773, 218, 2],
+      decoded: "<s> hello world</s>",
+    },
+    CHINESE_ONLY: {
+      text: BASE_TEST_STRINGS.CHINESE_ONLY,
+      tokens: ["\u2581", "\u751f", "\u6d3b", "\u7684", "\u771f", "\u8c1b", "\u662f"],
+      ids: [0, 190, 63145, 3, 62784, 63741, 3, 63010, 2],
+      decoded: "<s> \u751f<unk>\u7684\u771f<unk>\u662f</s>",
+    },
+    TRAILING_SPACE: {
+      text: BASE_TEST_STRINGS.TRAILING_SPACE,
+      tokens: ["\u2581trailing", "\u2581space"],
+      ids: [0, 12250, 769, 2],
+      decoded: "<s> trailing space</s>",
+    },
+    CURRENCY: {
+      text: BASE_TEST_STRINGS.CURRENCY,
+      tokens: ["\u2581test", "\u2581$1", "\u2581R", "2", "\u2581#3", "\u2581\u20ac4", "\u2581\u00a35", "\u2581\u00a5", "6", "\u2581", "\u20a3", "7", "\u2581\u20b9", "8", "\u2581", "\u20b1", "9", "\u2581test"],
+      ids: [0, 1036, 1594, 791, 792, 28537, 46242, 15364, 45731, 1487, 190, 3, 1473, 32176, 1439, 190, 63400, 1428, 1036, 2],
+      decoded: "<s> test $1 R2 #3 \u20ac4 \u00a35 \u00a56 <unk>7 \u20b98 \u20b19 test</s>",
+    },
+    CURRENCY_WITH_DECIMALS: {
+      text: BASE_TEST_STRINGS.CURRENCY_WITH_DECIMALS,
+      tokens: ["\u2581I", "\u2581bought", "\u2581an", "\u2581apple", "\u2581for", "\u2581$1.00", "\u2581at", "\u2581the", "\u2581store", "."],
+      ids: [0, 14, 1355, 46, 8688, 17, 44092, 29, 4, 920, 5, 2],
+      decoded: "<s> I bought an apple for $1.00 at the store.</s>",
+    },
+    TILDE_NORMALIZATION: {
+      text: BASE_TEST_STRINGS.TILDE_NORMALIZATION,
+      tokens: ["\u2581weird", "\u2581", "\uff5e", "\u2581edge", "\u2581", "\uff5e", "\u2581case"],
+      ids: [0, 3392, 190, 3, 2297, 190, 3, 343, 2],
+      decoded: "<s> weird <unk> edge <unk> case</s>",
+    },
+    SPIECE_UNDERSCORE: {
+      text: BASE_TEST_STRINGS.SPIECE_UNDERSCORE,
+      tokens: ["\u2581This", "\u2581is", "\u2581a", "\u2581test", "\u2581."],
+      ids: [0, 127, 18, 10, 1036, 13, 2],
+      decoded: "<s> This is a test.</s>",
+    },
+    POPULAR_EMOJIS: {
+      text: BASE_TEST_STRINGS.POPULAR_EMOJIS,
+      tokens: ["\u2581", "\ud83d\ude02", "\u2581", "\ud83d\udc4d", "\u2581", "\ud83e\udd23", "\u2581", "\ud83d\ude0d", "\u2581", "\ud83d\ude2d", "\u2581", "\ud83c\udf89", "\u2581", "\ud83d\ude4f", "\u2581", "\ud83d\ude0a", "\u2581", "\ud83d\udd25", "\u2581", "\ud83d\ude01", "\u2581", "\ud83d\ude05", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\ude06", "\u2581", "\ud83d\udc4f", "\u2581\u2764", "\ufe0f", "\u2581", "\ud83d\udc9c", "\u2581", "\ud83d\udc9a", "\u2581", "\ud83d\udc97", "\u2581", "\ud83d\udc99", "\u2581", "\ud83d\udda4", "\u2581", "\ud83d\ude0e", "\u2581", "\ud83d\udc4c", "\u2581", "\ud83e\udd73", "\u2581", "\ud83d\udcaa", "\u2581", "\u2728", "\u2581", "\ud83d\udc49", "\u2581", "\ud83d\udc40", "\u2581", "\ud83d\udcaf", "\u2581", "\ud83c\udf88", "\u2581", "\ud83d\ude48", "\u2581", "\ud83d\ude4c", "\u2581", "\ud83d\udc80", "\u2581", "\ud83d\udc47", "\u2581", "\ud83d\udc4b", "\u2581", "\u2705", "\u2581", "\ud83c\udf81", "\u2581", "\ud83c\udf1e", "\u2581", "\ud83c\udf38", "\u2581", "\ud83d\udcb0"],
+      ids: [0, 190, 45790, 190, 63194, 190, 63067, 190, 62942, 190, 62920, 190, 63116, 190, 62811, 190, 63283, 190, 62997, 190, 63427, 190, 3, 190, 63849, 190, 63751, 190, 62872, 47278, 21661, 190, 63467, 190, 63570, 190, 63473, 190, 63061, 190, 63640, 190, 63468, 190, 63302, 190, 3, 190, 63064, 190, 62860, 190, 63489, 190, 63057, 190, 63543, 190, 3, 190, 3, 190, 62949, 190, 3, 190, 63847, 190, 3, 190, 63747, 190, 3, 190, 3, 190, 3, 190, 3, 2],
+      decoded: "<s> \ud83d\ude02 \ud83d\udc4d \ud83e\udd23 \ud83d\ude0d \ud83d\ude2d \ud83c\udf89 \ud83d\ude4f \ud83d\ude0a \ud83d\udd25 \ud83d\ude01 <unk> \ud83e\udd17 \ud83d\ude06 \ud83d\udc4f \u2764\ufe0f \ud83d\udc9c \ud83d\udc9a \ud83d\udc97 \ud83d\udc99 \ud83d\udda4 \ud83d\ude0e \ud83d\udc4c <unk> \ud83d\udcaa \u2728 \ud83d\udc49 \ud83d\udc40 \ud83d\udcaf <unk> <unk> \ud83d\ude4c <unk> \ud83d\udc47 <unk> \u2705 <unk> <unk> <unk> <unk></s>",
+    },
+    MULTIBYTE_EMOJIS: {
+      text: BASE_TEST_STRINGS.MULTIBYTE_EMOJIS,
+      tokens: ["\u2581", "\u2728", "\u2581", "\ud83e\udd17", "\u2581", "\ud83d\udc41", "\ufe0f", "\u2581", "\ud83d\udc71", "\ud83c\udffb", "\u2581", "\ud83d\udd75", "\u2581", "\u2642", "\ufe0f", "\u2581", "\ud83e\uddd9", "\ud83c\udffb", "\u2581", "\u2642", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u2581", "\ud83c\udf3e", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1", "\u2581", "\ud83d\udc69", "\u2581\u2764", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc69", "\u2581", "\ud83d\udc67", "\u2581", "\ud83d\udc66", "\u2581", "\ud83e\uddd1", "\ud83c\udffb", "\u2581", "\ud83e\udd1d", "\u2581", "\ud83e\uddd1", "\ud83c\udffb", "\u2581", "\ud83c\udff4", "\udb40\udc67", "\udb40\udc62\udb40\udc65\udb40\udc6e", "\udb40\udc67", "\udb40\udc7f", "\u2581", "\ud83d\udc68", "\ud83c\udffb", "\u2581\u2764", "\ufe0f", "\u2581", "\ud83d\udc8b", "\u2581", "\ud83d\udc68", "\ud83c\udffc"],
+      ids: [0, 190, 62860, 190, 63849, 190, 3, 21661, 190, 3, 62863, 190, 3, 190, 63135, 21661, 190, 3, 62863, 190, 63135, 190, 3, 62863, 190, 3, 190, 3, 190, 3, 190, 3, 190, 3, 47278, 190, 63500, 190, 3, 190, 3, 190, 3, 190, 3, 190, 3, 190, 3, 62863, 190, 3, 190, 3, 62863, 190, 3, 63769, 3, 63769, 3, 190, 3, 62863, 47278, 21661, 190, 63500, 190, 3, 62816, 2],
+      decoded: "<s> \u2728 \ud83e\udd17 <unk>\ufe0f <unk>\ud83c\udffb <unk> \u2642\ufe0f <unk>\ud83c\udffb \u2642 <unk>\ud83c\udffb <unk> <unk> <unk> <unk> <unk> \u2764 \ud83d\udc8b <unk> <unk> <unk> <unk> <unk> <unk>\ud83c\udffb <unk> <unk>\ud83c\udffb <unk>\udb40\udc67<unk>\udb40\udc67<unk> <unk>\ud83c\udffb \u2764\ufe0f \ud83d\udc8b <unk>\ud83c\udffc</s>",
+    },
+  },
+};
diff --git a/tests/pipelines.test.js b/tests/pipelines.test.js
index 9c9900cdb..6bef83297 100644
--- a/tests/pipelines.test.js
+++ b/tests/pipelines.test.js
@@ -1,7 +1,6 @@
-
-import { pipeline, cos_sim } from '../src/transformers.js';
-import { init, m, MAX_TEST_EXECUTION_TIME } from './init.js';
-import { compare, loadAudio } from './test_utils.js';
+import { pipeline, cos_sim } from "../src/transformers.js";
+import { init, MAX_TEST_EXECUTION_TIME } from "./init.js";
+import { compare, loadAudio } from "./test_utils.js";
 
 // Initialise the testing environment
 init();
@@ -11,573 +10,550 @@ init();
 // This is due to how model construction and destruction occurs, in `beforeAll` and `afterAll`, respectively.
 // As a result, each test is responsible for exactly one model, but we run multiple inputs through it.
 // By encapsulating model construction and destruction in a single `it` block, we avoid these memory issues.
-describe('Pipelines', () => {
-
-    describe('Text classification', () => {
-
-        // List all models which will be tested
-        const models = [
-            'distilbert-base-uncased-finetuned-sst-2-english',
-            'Xenova/toxic-bert',
-        ];
-
-        // single_label_classification
-        it(models[0], async () => {
-            let classifier = await pipeline('text-classification', m(models[0]));
-            let texts = [
-                "This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three.",
-                "I hated the movie"
-            ];
-
-            // single
-            {
-                let outputs = await classifier("I hated the movie");
-                let expected = [
-                    { "label": "NEGATIVE", "score": 0.9996212720870972 }
-                ];
-                compare(outputs, expected);
-            }
-
-            // single + topk
-            {
-                let outputs = await classifier("I hated the movie", {
-                    topk: 2
-                });
-                let expected = [
-                    { "label": "NEGATIVE", "score": 0.9996212720870972 },
-                    { "label": "POSITIVE", "score": 0.0003787268069572747 }
-                ];
-                compare(outputs, expected);
-            }
-
-            // batched
-            {
-                let outputs = await classifier(texts);
-
-                let expected = [
-                    { "label": "POSITIVE", "score": 0.9993746876716614 },
-                    { "label": "NEGATIVE", "score": 0.9996694326400757 }
-                ];
-
-                compare(outputs, expected);
-            }
-
-
-            // batched + topk
-            {
-                let outputs = await classifier(texts, {
-                    topk: 2
-                });
-
-                let expected = [[
-                    { "label": "POSITIVE", "score": 0.9993746876716614 },
-                    { "label": "NEGATIVE", "score": 0.0006253048195503652 }
-                ], [
-                    { "label": "NEGATIVE", "score": 0.9996694326400757 },
-                    { "label": "POSITIVE", "score": 0.00033057318069040775 }
-                ]];
-
-                compare(outputs, expected);
-            }
-
-
-            await classifier.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-
-        // multi_label_classification
-        it(models[1], async () => {
-            let classifier = await pipeline('text-classification', m(models[1]));
-            let texts = [
-                "I like you. I love you", // low scores
-                "I hate you." // high scores
-            ];
-
-            // single
-            {
-                let outputs = await classifier(texts);
-                let expected = [
-                    { label: 'toxic', score: 0.0007729064091108739 },
-                    { label: 'toxic', score: 0.9475088119506836 }
-                ]
-                compare(outputs, expected);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
-
-
-    });
-
-    describe('Token classification', () => {
-
-        // List all models which will be tested
-        const models = [
-            'Davlan/bert-base-multilingual-cased-ner-hrl',
-        ];
-
-        it(models[0], async () => {
-            let classifier = await pipeline('token-classification', m(models[0]));
-            let texts = [
-                "The Golden State Warriors are an American professional basketball team based in San Francisco.",
-                "My name is Sarah and I live in London."
-            ];
-
-            // single
-            {
-                let outputs = await classifier(texts[0]);
-
-                let expected = [
-                    { entity: "B-ORG", score: 0.9998535513877869, index: 2, word: "Golden", start: null, end: null },
-                    { entity: "I-ORG", score: 0.9998612999916077, index: 3, word: "State", start: null, end: null },
-                    { entity: "I-ORG", score: 0.999866247177124, index: 4, word: "Warriors", start: null, end: null },
-                    { entity: "B-LOC", score: 0.9997050166130066, index: 13, word: "San", start: null, end: null },
-                    { entity: "I-LOC", score: 0.9987282156944275, index: 14, word: "Francisco", start: null, end: null }
-                ];
-
-                compare(outputs, expected, 0.05);
-
-            }
-
-            // batched
-            {
-                let outputs = await classifier(texts);
-
-                let expected = [
-                    [
-                        { entity: "B-ORG", score: 0.9998375773429871, index: 2, word: "Golden", start: null, end: null },
-                        { entity: "I-ORG", score: 0.9998642206192017, index: 3, word: "State", start: null, end: null },
-                        { entity: "I-ORG", score: 0.9998642802238464, index: 4, word: "Warriors", start: null, end: null },
-                        { entity: "B-LOC", score: 0.9996914863586426, index: 13, word: "San", start: null, end: null },
-                        { entity: "I-LOC", score: 0.9989780783653259, index: 14, word: "Francisco", start: null, end: null }
-                    ], [
-                        { entity: "B-PER", score: 0.997977614402771, index: 4, word: "Sarah", start: null, end: null },
-                        { entity: "B-LOC", score: 0.9996902346611023, index: 9, word: "London", start: null, end: null }
-                    ]
-                ];
-
-                compare(outputs, expected, 0.05);
-            }
-
-            await classifier.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Zero-shot classification', () => {
-
-        // List all models which will be tested
-        const models = [
-            'facebook/bart-large-mnli',
-        ];
-
-        it(models[0], async () => {
-            let classifier = await pipeline('zero-shot-classification', m(models[0]));
-
-            let sequences_to_classify = ['one day I will see the world', 'I love making pizza'];
-            let candidate_labels = ['travel', 'cooking', 'dancing'];
-
-            // single
-            {
-                let outputs = await classifier(sequences_to_classify[0], candidate_labels);
-                let expected = {
-                    sequence: "one day I will see the world",
-                    labels: ["travel", "dancing", "cooking"],
-                    scores: [0.4261703487477968, 0.2903585771517135, 0.28347107410048983]
-                }
-
-                compare(outputs, expected, 0.2);
-
-            }
-
-            // batched
-            {
-                let outputs = await classifier(sequences_to_classify, candidate_labels);
-                let expected = [{
-                    sequence: "one day I will see the world",
-                    labels: ["travel", "dancing", "cooking"],
-                    scores: [0.4261703487477968, 0.2903585771517135, 0.28347107410048983]
-                }, {
-                    sequence: "I love making pizza",
-                    labels: ["cooking", "travel", "dancing"],
-                    scores: [0.4660367922118968, 0.2756005926506238, 0.2583626151374795]
-                }];
-
-                compare(outputs, expected, 0.2);
-
-            }
-
-
-            // batched + multilabel
-            {
-                let outputs = await classifier(sequences_to_classify, candidate_labels, {
-                    multi_label: true
-                })
-                let expected = [{
-                    sequence: "one day I will see the world",
-                    labels: ["travel", "dancing", "cooking"],
-                    scores: [0.7108286792234982, 0.5763787804099745, 0.44303326070949994]
-                }, {
-                    sequence: "I love making pizza",
-                    labels: ["cooking", "travel", "dancing"],
-                    scores: [0.8527619536354446, 0.7899589317978243, 0.5838912691496106]
-                }];
-
-                compare(outputs, expected);
-
-            }
-
-            await classifier.dispose();
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Masked language modelling', () => {
-
-        // List all models which will be tested
-        const models = [
-            'bert-base-uncased',
-        ];
-
-        it(models[0], async () => {
-            let unmasker = await pipeline('fill-mask', m(models[0]));
-            let texts = [
-                "Once upon a [MASK].",
-                "[MASK] is the capital of England."
-            ];
-
-            // single
-            {
-                let outputs = await unmasker(texts[0]);
-                let expected = [
-                    {
-                        score: 0.9405396580696106,
-                        token: 2051,
-                        token_str: 'time',
-                        sequence: 'once upon a time.'
-                    },
-                    {
-                        score: 0.01182964164763689,
-                        token: 13342,
-                        token_str: 'mattress',
-                        sequence: 'once upon a mattress.'
-                    },
-                    {
-                        score: 0.0017291896510869265,
-                        token: 6480,
-                        token_str: 'lifetime',
-                        sequence: 'once upon a lifetime.'
-                    },
-                    {
-                        score: 0.0010079898638650775,
-                        token: 2504,
-                        token_str: 'level',
-                        sequence: 'once upon a level.'
-                    },
-                    {
-                        score: 0.0009655007743276656,
-                        token: 2154,
-                        token_str: 'day',
-                        sequence: 'once upon a day.'
-                    }
-                ];
-                compare(outputs, expected);
-
-            }
-
-
-            // batched
-            {
-                let outputs = await unmasker(texts);
-
-                let expected = [[
-                    {
-                        score: 0.9900539517402649,
-                        token: 2051,
-                        token_str: 'time',
-                        sequence: 'once upon a time.'
-                    },
-                    {
-                        score: 0.0012258145725354552,
-                        token: 13342,
-                        token_str: 'mattress',
-                        sequence: 'once upon a mattress.'
-                    },
-                    {
-                        score: 0.0002977887343149632,
-                        token: 2096,
-                        token_str: 'while',
-                        sequence: 'once upon a while.'
-                    },
-                    {
-                        score: 0.0001899998023873195,
-                        token: 6480,
-                        token_str: 'lifetime',
-                        sequence: 'once upon a lifetime.'
-                    },
-                    {
-                        score: 0.00017618606216274202,
-                        token: 2558,
-                        token_str: 'period',
-                        sequence: 'once upon a period.'
-                    }
-                ],
-                [
-                    {
-                        score: 0.2863538861274719,
-                        token: 2414,
-                        token_str: 'london',
-                        sequence: 'london is the capital of england.'
-                    },
-                    {
-                        score: 0.0607745461165905,
-                        token: 2009,
-                        token_str: 'it',
-                        sequence: 'it is the capital of england.'
-                    },
-                    {
-                        score: 0.037455108016729355,
-                        token: 6484,
-                        token_str: 'birmingham',
-                        sequence: 'birmingham is the capital of england.'
-                    },
-                    {
-                        score: 0.029375044628977776,
-                        token: 5087,
-                        token_str: 'manchester',
-                        sequence: 'manchester is the capital of england.'
-                    },
-                    {
-                        score: 0.0292277242988348,
-                        token: 7067,
-                        token_str: 'bristol',
-                        sequence: 'bristol is the capital of england.'
-                    }
-                ]];
-
-                compare(outputs, expected);
-
-            }
-
-            await unmasker.dispose();
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Question answering', () => {
-        let question = 'Who was Jim Henson?'
-        let context = 'Jim Henson was a nice puppet.'
-
-
-        // List all models which will be tested
-        const models = [
-            'distilbert-base-uncased-distilled-squad',
+xdescribe("Pipelines", () => {
+  describe("Text classification", () => {
+    // List all models which will be tested
+    const models = ["Xenova/distilbert-base-uncased-finetuned-sst-2-english", "Xenova/toxic-bert"];
+
+    // single_label_classification
+    it(
+      models[0],
+      async () => {
+        let classifier = await pipeline("text-classification", models[0]);
+        let texts = ["This was a masterpiece. Not completely faithful to the books, but enthralling from beginning to end. Might be my favorite of the three.", "I hated the movie"];
+
+        // single
+        {
+          let outputs = await classifier("I hated the movie");
+          let expected = [{ label: "NEGATIVE", score: 0.9996212720870972 }];
+          compare(outputs, expected);
+        }
+
+        // single + topk
+        {
+          let outputs = await classifier("I hated the movie", {
+            topk: 2,
+          });
+          let expected = [
+            { label: "NEGATIVE", score: 0.9996212720870972 },
+            { label: "POSITIVE", score: 0.0003787268069572747 },
+          ];
+          compare(outputs, expected);
+        }
+
+        // batched
+        {
+          let outputs = await classifier(texts);
+
+          let expected = [
+            { label: "POSITIVE", score: 0.9993746876716614 },
+            { label: "NEGATIVE", score: 0.9996694326400757 },
+          ];
+
+          compare(outputs, expected);
+        }
+
+        // batched + topk
+        {
+          let outputs = await classifier(texts, {
+            topk: 2,
+          });
+
+          let expected = [
+            [
+              { label: "POSITIVE", score: 0.9993746876716614 },
+              { label: "NEGATIVE", score: 0.0006253048195503652 },
+            ],
+            [
+              { label: "NEGATIVE", score: 0.9996694326400757 },
+              { label: "POSITIVE", score: 0.00033057318069040775 },
+            ],
+          ];
+
+          compare(outputs, expected);
+        }
+
+        await classifier.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    // multi_label_classification
+    it(
+      models[1],
+      async () => {
+        let classifier = await pipeline("text-classification", models[1]);
+        let texts = [
+          "I like you. I love you", // low scores
+          "I hate you.", // high scores
         ];
 
-        it(models[0], async () => {
-            let answerer = await pipeline('question-answering', m(models[0]));
-
-            // single
+        // single
+        {
+          let outputs = await classifier(texts);
+          let expected = [
+            { label: "toxic", score: 0.0007729064091108739 },
+            { label: "toxic", score: 0.9475088119506836 },
+          ];
+          compare(outputs, expected);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Token classification", () => {
+    // List all models which will be tested
+    const models = ["Xenova/bert-base-multilingual-cased-ner-hrl"];
+
+    it(
+      models[0],
+      async () => {
+        let classifier = await pipeline("token-classification", models[0]);
+        let texts = ["The Golden State Warriors are an American professional basketball team based in San Francisco.", "My name is Sarah and I live in London."];
+
+        // single
+        {
+          let outputs = await classifier(texts[0]);
+
+          let expected = [
+            { entity: "B-ORG", score: 0.9998535513877869, index: 2, word: "Golden", start: null, end: null },
+            { entity: "I-ORG", score: 0.9998612999916077, index: 3, word: "State", start: null, end: null },
+            { entity: "I-ORG", score: 0.999866247177124, index: 4, word: "Warriors", start: null, end: null },
+            { entity: "B-LOC", score: 0.9997050166130066, index: 13, word: "San", start: null, end: null },
+            { entity: "I-LOC", score: 0.9987282156944275, index: 14, word: "Francisco", start: null, end: null },
+          ];
+
+          compare(outputs, expected, 0.05);
+        }
+
+        // batched
+        {
+          let outputs = await classifier(texts);
+
+          let expected = [
+            [
+              { entity: "B-ORG", score: 0.9998375773429871, index: 2, word: "Golden", start: null, end: null },
+              { entity: "I-ORG", score: 0.9998642206192017, index: 3, word: "State", start: null, end: null },
+              { entity: "I-ORG", score: 0.9998642802238464, index: 4, word: "Warriors", start: null, end: null },
+              { entity: "B-LOC", score: 0.9996914863586426, index: 13, word: "San", start: null, end: null },
+              { entity: "I-LOC", score: 0.9989780783653259, index: 14, word: "Francisco", start: null, end: null },
+            ],
+            [
+              { entity: "B-PER", score: 0.997977614402771, index: 4, word: "Sarah", start: null, end: null },
+              { entity: "B-LOC", score: 0.9996902346611023, index: 9, word: "London", start: null, end: null },
+            ],
+          ];
+
+          compare(outputs, expected, 0.05);
+        }
+
+        await classifier.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Zero-shot classification", () => {
+    // List all models which will be tested
+    const models = ["Xenova/bart-large-mnli"];
+
+    it(
+      models[0],
+      async () => {
+        let classifier = await pipeline("zero-shot-classification", models[0]);
+
+        let sequences_to_classify = ["one day I will see the world", "I love making pizza"];
+        let candidate_labels = ["travel", "cooking", "dancing"];
+
+        // single
+        {
+          let outputs = await classifier(sequences_to_classify[0], candidate_labels);
+          let expected = {
+            sequence: "one day I will see the world",
+            labels: ["travel", "dancing", "cooking"],
+            scores: [0.4261703487477968, 0.2903585771517135, 0.28347107410048983],
+          };
+
+          compare(outputs, expected, 0.2);
+        }
+
+        // batched
+        {
+          let outputs = await classifier(sequences_to_classify, candidate_labels);
+          let expected = [
             {
-                let outputs = await answerer(question, context);
-                let expected = { answer: 'a nice puppet', score: 0.5664517526948352 };
-
-                compare(outputs, expected, 0.2);
-            }
-
-            // single + topk
+              sequence: "one day I will see the world",
+              labels: ["travel", "dancing", "cooking"],
+              scores: [0.4261703487477968, 0.2903585771517135, 0.28347107410048983],
+            },
             {
-                let outputs = await answerer(question, context, {
-                    topk: 3,
-                });
-                let expected = [
-                    { answer: 'a nice puppet', score: 0.5664517526948352 },
-                    { answer: 'nice puppet', score: 0.1698902336448853 },
-                    { answer: 'puppet', score: 0.14046057793125577 }
-                ];
-
-                compare(outputs, expected, 0.2);
-
-            }
-            await answerer.dispose();
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Summarization', () => {
-
-        // List all models which will be tested
-        const models = [
-            'sshleifer/distilbart-cnn-6-6',
-            'facebook/bart-large-cnn',
-        ];
-
-        let texts = [
-            `The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.`,
-            `The Amazon rainforest (Portuguese: Floresta Amazônica or Amazônia; Spanish: Selva Amazónica, Amazonía or usually Amazonia; French: Forêt amazonienne; Dutch: Amazoneregenwoud), also known in English as Amazonia or the Amazon Jungle, is a moist broadleaf forest that covers most of the Amazon basin of South America. This basin encompasses 7,000,000 square kilometres (2,700,000 sq mi), of which 5,500,000 square kilometres (2,100,000 sq mi) are covered by the rainforest. This region includes territory belonging to nine nations. The majority of the forest is contained within Brazil, with 60% of the rainforest, followed by Peru with 13%, Colombia with 10%, and with minor amounts in Venezuela, Ecuador, Bolivia, Guyana, Suriname and French Guiana. States or departments in four nations contain "Amazonas" in their names. The Amazon represents over half of the planet's remaining rainforests, and comprises the largest and most biodiverse tract of tropical rainforest in the world, with an estimated 390 billion individual trees divided into 16,000 species.`
-        ];
-
-        it(models[0], async () => {
-            let summarizer = await pipeline('summarization', m(models[0]));
-
-            // batched
+              sequence: "I love making pizza",
+              labels: ["cooking", "travel", "dancing"],
+              scores: [0.4660367922118968, 0.2756005926506238, 0.2583626151374795],
+            },
+          ];
+
+          compare(outputs, expected, 0.2);
+        }
+
+        // batched + multilabel
+        {
+          let outputs = await classifier(sequences_to_classify, candidate_labels, {
+            multi_label: true,
+          });
+          let expected = [
             {
-                let summary = await summarizer(texts, {
-                    top_k: 0,
-                    do_sample: false,
-                });
-                expect(summary).toHaveLength(2);
-                expect(summary[0].summary_text.length).toBeGreaterThan(50);
-                expect(summary[1].summary_text.length).toBeGreaterThan(50);
-            }
-            await summarizer.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-
-
-        it(models[1], async () => {
-            let summarizer = await pipeline('summarization', m(models[1]));
-
-            // batched + `forced_bos_token_id`
+              sequence: "one day I will see the world",
+              labels: ["travel", "dancing", "cooking"],
+              scores: [0.7108286792234982, 0.5763787804099745, 0.44303326070949994],
+            },
             {
-                let summary = await summarizer(texts[0], {
-                    top_k: 0,
-                    do_sample: false,
-                });
-                expect(summary).toHaveLength(1);
-                expect(summary[0].summary_text.length).toBeGreaterThan(50);
-            }
-
-            await summarizer.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Translation', () => {
-
-        // List all models which will be tested
-        const models = [
-            't5-small',
-
-            // Multilingual model
-            'facebook/nllb-200-distilled-600M',
-        ];
-
-        it(models[0], async () => {
-            let translator = await pipeline('translation_en_to_de', m(models[0]));
-            let texts = [
-                'Hello, how are you?',
-                'My name is Maria.',
-            ]
-
-            // single
+              sequence: "I love making pizza",
+              labels: ["cooking", "travel", "dancing"],
+              scores: [0.8527619536354446, 0.7899589317978243, 0.5838912691496106],
+            },
+          ];
+
+          compare(outputs, expected);
+        }
+
+        await classifier.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Masked language modelling", () => {
+    // List all models which will be tested
+    const models = ["Xenova/bert-base-uncased"];
+
+    it(
+      models[0],
+      async () => {
+        let unmasker = await pipeline("fill-mask", models[0]);
+        let texts = ["Once upon a [MASK].", "[MASK] is the capital of England."];
+
+        // single
+        {
+          let outputs = await unmasker(texts[0]);
+          let expected = [
             {
-                let translation = await translator(texts[0], {
-                    top_k: 0,
-                    do_sample: false
-                });
-
-                let expected = [
-                    { "translation_text": "Hallo, wie sind Sie?" }
-                ];
-
-                compare(translation, expected);
-            }
-
-            // batched
+              score: 0.9405396580696106,
+              token: 2051,
+              token_str: "time",
+              sequence: "once upon a time.",
+            },
             {
-                let output = await translator(texts, {
-                    top_k: 0,
-                    do_sample: false
-                });
-
-                let expected = [
-                    { 'translation_text': 'Hallo, wie sind Sie?' },
-                    { 'translation_text': 'Mein Name ist Maria.' }
-                ];
-
-                compare(output, expected);
-
-            }
-
-            await translator.dispose();
-        }, MAX_TEST_EXECUTION_TIME);
-
-
-        it(models[1], async () => {
-            let translator = await pipeline('translation', m(models[1]));
-            let texts = [
-                'Hello world!',
-                'I like to walk my dog.',
-            ]
-
-            // single
+              score: 0.01182964164763689,
+              token: 13342,
+              token_str: "mattress",
+              sequence: "once upon a mattress.",
+            },
             {
-                let translation = await translator(texts[0], {
-                    src_lang: 'eng_Latn',
-                    tgt_lang: 'arb_Arab'
-                });
-
-                let expected = [
-                    { 'translation_text': 'مرحباً، يا عالم!' }
-                ];
-
-                compare(translation, expected);
-            };
-
-            // single + back-translation
+              score: 0.0017291896510869265,
+              token: 6480,
+              token_str: "lifetime",
+              sequence: "once upon a lifetime.",
+            },
             {
-                let translation1 = await translator(texts[1], {
-                    // src_lang: 'eng_Latn',
-                    tgt_lang: 'ell_Grek'
-                });
-                let translation2 = await translator(translation1[0].translation_text, {
-                    src_lang: 'ell_Grek',
-                    tgt_lang: 'eng_Latn'
-                });
-
-                let expected = [
-                    { translation_text: 'Μου αρέσει να περπατάω το σκυλί μου.' }
-                ]
-
-                compare(translation1, expected);
-
-                let expectedBack = [
-                    { translation_text: texts[1] }
-                ]
-                compare(translation2, expectedBack);
-
-            }
-
-            await translator.dispose();
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Text-to-text generation', () => {
-
-        // List all models which will be tested
-        const models = [
-            'google/flan-t5-small',
-            'google/flan-t5-base',
-        ];
-
-        it(models[0], async () => {
-            let generator = await pipeline('text2text-generation', m(models[0]));
-            let text = "Premise:  At my age you will probably have learnt one lesson. " +
-                "Hypothesis:  It's not certain how many lessons you'll learn by your thirties. " +
-                "Does the premise entail the hypothesis?";
-
+              score: 0.0010079898638650775,
+              token: 2504,
+              token_str: "level",
+              sequence: "once upon a level.",
+            },
             {
-                let outputs = await generator(text, {
-                    top_k: 0,
-                    do_sample: false
-                });
-                expect(outputs).toHaveLength(1);
-                expect(outputs[0].generated_text.length).toBeGreaterThan(1);
-            }
-
-            await generator.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-
-        it(models[1], async () => {
-            let generator = await pipeline('text2text-generation', m(models[1]));
-            let text = `
+              score: 0.0009655007743276656,
+              token: 2154,
+              token_str: "day",
+              sequence: "once upon a day.",
+            },
+          ];
+          compare(outputs, expected);
+        }
+
+        // batched
+        {
+          let outputs = await unmasker(texts);
+
+          let expected = [
+            [
+              {
+                score: 0.9900539517402649,
+                token: 2051,
+                token_str: "time",
+                sequence: "once upon a time.",
+              },
+              {
+                score: 0.0012258145725354552,
+                token: 13342,
+                token_str: "mattress",
+                sequence: "once upon a mattress.",
+              },
+              {
+                score: 0.0002977887343149632,
+                token: 2096,
+                token_str: "while",
+                sequence: "once upon a while.",
+              },
+              {
+                score: 0.0001899998023873195,
+                token: 6480,
+                token_str: "lifetime",
+                sequence: "once upon a lifetime.",
+              },
+              {
+                score: 0.00017618606216274202,
+                token: 2558,
+                token_str: "period",
+                sequence: "once upon a period.",
+              },
+            ],
+            [
+              {
+                score: 0.2863538861274719,
+                token: 2414,
+                token_str: "london",
+                sequence: "london is the capital of england.",
+              },
+              {
+                score: 0.0607745461165905,
+                token: 2009,
+                token_str: "it",
+                sequence: "it is the capital of england.",
+              },
+              {
+                score: 0.037455108016729355,
+                token: 6484,
+                token_str: "birmingham",
+                sequence: "birmingham is the capital of england.",
+              },
+              {
+                score: 0.029375044628977776,
+                token: 5087,
+                token_str: "manchester",
+                sequence: "manchester is the capital of england.",
+              },
+              {
+                score: 0.0292277242988348,
+                token: 7067,
+                token_str: "bristol",
+                sequence: "bristol is the capital of england.",
+              },
+            ],
+          ];
+
+          compare(outputs, expected);
+        }
+
+        await unmasker.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Question answering", () => {
+    let question = "Who was Jim Henson?";
+    let context = "Jim Henson was a nice puppet.";
+
+    // List all models which will be tested
+    const models = ["Xenova/distilbert-base-uncased-distilled-squad"];
+
+    it(
+      models[0],
+      async () => {
+        let answerer = await pipeline("question-answering", models[0]);
+
+        // single
+        {
+          let outputs = await answerer(question, context);
+          let expected = { answer: "a nice puppet", score: 0.5664517526948352 };
+
+          compare(outputs, expected, 0.2);
+        }
+
+        // single + topk
+        {
+          let outputs = await answerer(question, context, {
+            topk: 3,
+          });
+          let expected = [
+            { answer: "a nice puppet", score: 0.5664517526948352 },
+            { answer: "nice puppet", score: 0.1698902336448853 },
+            { answer: "puppet", score: 0.14046057793125577 },
+          ];
+
+          compare(outputs, expected, 0.2);
+        }
+        await answerer.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Summarization", () => {
+    // List all models which will be tested
+    const models = ["Xenova/distilbart-cnn-6-6", "Xenova/bart-large-cnn"];
+
+    let texts = [`The tower is 324 metres (1,063 ft) tall, about the same height as an 81-storey building, and the tallest structure in Paris. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. It was the first structure to reach a height of 300 metres. Due to the addition of a broadcasting aerial at the top of the tower in 1957, it is now taller than the Chrysler Building by 5.2 metres (17 ft). Excluding transmitters, the Eiffel Tower is the second tallest free-standing structure in France after the Millau Viaduct.`, `The Amazon rainforest (Portuguese: Floresta Amazônica or Amazônia; Spanish: Selva Amazónica, Amazonía or usually Amazonia; French: Forêt amazonienne; Dutch: Amazoneregenwoud), also known in English as Amazonia or the Amazon Jungle, is a moist broadleaf forest that covers most of the Amazon basin of South America. This basin encompasses 7,000,000 square kilometres (2,700,000 sq mi), of which 5,500,000 square kilometres (2,100,000 sq mi) are covered by the rainforest. This region includes territory belonging to nine nations. The majority of the forest is contained within Brazil, with 60% of the rainforest, followed by Peru with 13%, Colombia with 10%, and with minor amounts in Venezuela, Ecuador, Bolivia, Guyana, Suriname and French Guiana. States or departments in four nations contain "Amazonas" in their names. The Amazon represents over half of the planet's remaining rainforests, and comprises the largest and most biodiverse tract of tropical rainforest in the world, with an estimated 390 billion individual trees divided into 16,000 species.`];
+
+    it(
+      models[0],
+      async () => {
+        let summarizer = await pipeline("summarization", models[0]);
+
+        // batched
+        {
+          let summary = await summarizer(texts, {
+            top_k: 0,
+            do_sample: false,
+          });
+          expect(summary).toHaveLength(2);
+          expect(summary[0].summary_text.length).toBeGreaterThan(50);
+          expect(summary[1].summary_text.length).toBeGreaterThan(50);
+        }
+        await summarizer.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      models[1],
+      async () => {
+        let summarizer = await pipeline("summarization", models[1]);
+
+        // batched + `forced_bos_token_id`
+        {
+          let summary = await summarizer(texts[0], {
+            top_k: 0,
+            do_sample: false,
+          });
+          expect(summary).toHaveLength(1);
+          expect(summary[0].summary_text.length).toBeGreaterThan(50);
+        }
+
+        await summarizer.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Translation", () => {
+    // List all models which will be tested
+    const models = [
+      "Xenova/t5-small",
+
+      // Multilingual model
+      "Xenova/nllb-200-distilled-600M",
+    ];
+
+    it(
+      models[0],
+      async () => {
+        let translator = await pipeline("translation_en_to_de", models[0]);
+        let texts = ["Hello, how are you?", "My name is Maria."];
+
+        // single
+        {
+          let translation = await translator(texts[0], {
+            top_k: 0,
+            do_sample: false,
+          });
+
+          let expected = [{ translation_text: "Hallo, wie sind Sie?" }];
+
+          compare(translation, expected);
+        }
+
+        // batched
+        {
+          let output = await translator(texts, {
+            top_k: 0,
+            do_sample: false,
+          });
+
+          let expected = [{ translation_text: "Hallo, wie sind Sie?" }, { translation_text: "Mein Name ist Maria." }];
+
+          compare(output, expected);
+        }
+
+        await translator.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      models[1],
+      async () => {
+        let translator = await pipeline("translation", models[1]);
+        let texts = ["Hello world!", "I like to walk my dog."];
+
+        // single
+        {
+          let translation = await translator(texts[0], {
+            src_lang: "eng_Latn",
+            tgt_lang: "arb_Arab",
+          });
+
+          let expected = [{ translation_text: "مرحباً، يا عالم!" }];
+
+          compare(translation, expected);
+        }
+
+        // single + back-translation
+        {
+          let translation1 = await translator(texts[1], {
+            // src_lang: 'eng_Latn',
+            tgt_lang: "ell_Grek",
+          });
+          let translation2 = await translator(translation1[0].translation_text, {
+            src_lang: "ell_Grek",
+            tgt_lang: "eng_Latn",
+          });
+
+          let expected = [{ translation_text: "Μου αρέσει να περπατάω το σκυλί μου." }];
+
+          compare(translation1, expected);
+
+          let expectedBack = [{ translation_text: texts[1] }];
+          compare(translation2, expectedBack);
+        }
+
+        await translator.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Text-to-text generation", () => {
+    // List all models which will be tested
+    const models = ["Xenova/flan-t5-small", "Xenova/flan-t5-base"];
+
+    it(
+      models[0],
+      async () => {
+        let generator = await pipeline("text2text-generation", models[0]);
+        let text = "Premise:  At my age you will probably have learnt one lesson. " + "Hypothesis:  It's not certain how many lessons you'll learn by your thirties. " + "Does the premise entail the hypothesis?";
+
+        {
+          let outputs = await generator(text, {
+            top_k: 0,
+            do_sample: false,
+          });
+          expect(outputs).toHaveLength(1);
+          expect(outputs[0].generated_text.length).toBeGreaterThan(1);
+        }
+
+        await generator.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      models[1],
+      async () => {
+        let generator = await pipeline("text2text-generation", models[1]);
+        let text = `
             Q: Roger has 5 tennis balls. He buys 2 more cans of tennis balls. Each can
             has 3 tennis balls. How many tennis balls does he have now?
             A: Roger started with 5 balls. 2 cans of 3 tennis balls each is 6 tennis balls.
@@ -586,1030 +562,1023 @@ describe('Pipelines', () => {
             Q: A juggler can juggle 16 balls. Half of the balls are golf balls, and half
             of the golf balls are blue. How many blue golf balls are there?`;
 
-            // single
-            {
-                let outputs = await generator(text, {
-                    top_k: 0,
-                    do_sample: false
-                });
-                expect(outputs).toHaveLength(1);
-                expect(outputs[0].generated_text.length).toBeGreaterThan(10);
+        // single
+        {
+          let outputs = await generator(text, {
+            top_k: 0,
+            do_sample: false,
+          });
+          expect(outputs).toHaveLength(1);
+          expect(outputs[0].generated_text.length).toBeGreaterThan(10);
+        }
+        await generator.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Text generation", () => {
+    // List all models which will be tested
+    const models = ["Xenova/distilgpt2", "Xenova/codegen-350M-mono"];
+
+    it(
+      models[0],
+      async () => {
+        let generator = await pipeline("text-generation", models[0]);
+        let texts = ["Once upon a time, there was a", "I enjoy walking with my cute dog"];
+
+        // single
+        {
+          let output = await generator(texts[0], {
+            max_new_tokens: 10,
+            top_k: 0,
+            do_sample: false,
+          });
+          expect(output).toHaveLength(1);
+          expect(output[0].generated_text.length).toBeGreaterThan(texts[0].length);
+        }
+
+        // single + `num_beams` + `num_return_sequences`
+        {
+          let output = await generator(texts[0], {
+            max_new_tokens: 10,
+            num_beams: 2,
+            num_return_sequences: 2,
+            top_k: 0,
+            do_sample: false,
+          });
+          expect(output).toHaveLength(2);
+          expect(output[0].generated_text.length).toBeGreaterThan(texts[0].length);
+          expect(output[1].generated_text.length).toBeGreaterThan(texts[0].length);
+        }
+
+        // batched + `num_beams` + `num_return_sequences`
+        {
+          let output = await generator(texts, {
+            max_new_tokens: 10,
+            num_beams: 2,
+            num_return_sequences: 2,
+            top_k: 0,
+            do_sample: false,
+          });
+          expect(output).toHaveLength(2);
+          expect(output[0]).toHaveLength(2);
+          expect(output[0][0].generated_text.length).toBeGreaterThan(texts[0].length);
+          expect(output[0][1].generated_text.length).toBeGreaterThan(texts[0].length);
+          expect(output[1]).toHaveLength(2);
+          expect(output[1][0].generated_text.length).toBeGreaterThan(texts[1].length);
+          expect(output[1][1].generated_text.length).toBeGreaterThan(texts[1].length);
+        }
+
+        await generator.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      models[1],
+      async () => {
+        let generator = await pipeline("text-generation", models[1]);
+        let code = "def fib(n):";
+
+        // single + `added_tokens`
+        {
+          let output = await generator(code, {
+            max_new_tokens: 45,
+            top_k: 0,
+            do_sample: false,
+          });
+          expect(output).toHaveLength(1);
+          expect(output[0].generated_text.length).toBeGreaterThan(code.length);
+        }
+        await generator.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Feature extraction", () => {
+    // List all models which will be tested
+    const models = ["Xenova/all-MiniLM-L6-v2"];
+
+    it(
+      models[0],
+      async () => {
+        let extractor = await pipeline("feature-extraction", models[0]);
+
+        // Provide sentences
+        let sentences = ["This framework generates embeddings for each input sentence", "Sentences are passed as a list of string.", "The quick brown fox jumps over the lazy dog."];
+
+        // Without pooling or normalization
+        {
+          let output = await extractor(sentences);
+          expect(output.dims).toHaveLength(3);
+        }
+
+        // With pooling and normalization + compare features
+        {
+          let output = await extractor(sentences, { pooling: "mean", normalize: true });
+          expect(output.dims).toHaveLength(2);
+
+          // Convert Tensor to JS list
+          output = output.tolist();
+
+          let pairwiseScores = [
+            [output[0], output[1]],
+            [output[0], output[2]],
+            [output[1], output[2]],
+          ].map((x) => cos_sim(...x));
+
+          let expected = [0.502872309810269, 0.11088411026413121, 0.09602621986931259];
+          compare(pairwiseScores, expected);
+        }
+        await extractor.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Speech-to-text generation", () => {
+    // List all models which will be tested
+    const models = [
+      // whisper
+      "Xenova/whisper-tiny.en", // English-only
+      "Xenova/whisper-small", // Multilingual
+      ["Xenova/whisper-tiny.en", "output_attentions"], // English-only + `output_attentions`
+      ["Xenova/whisper-small", "output_attentions"], // Multilingual + `output_attentions`
+
+      // wav2vec2
+      "jonatasgrosman/wav2vec2-large-xlsr-53-english",
+    ];
+
+    it(
+      models[0],
+      async () => {
+        let transcriber = await pipeline("automatic-speech-recognition", models[0]);
+
+        let url = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav";
+        let audioData = await loadAudio(url);
+
+        {
+          // Transcribe English
+          let output = await transcriber(audioData);
+          expect(output.text.length).toBeGreaterThan(50);
+          // { text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country." }
+        }
+
+        {
+          // Transcribe English w/ timestamps.
+          let output = await transcriber(audioData, { return_timestamps: true });
+          expect(output.text.length).toBeGreaterThan(50);
+          expect(output.chunks.length).toBeGreaterThan(0);
+          // {
+          //   text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country."
+          //   chunks: [
+          //     { timestamp: [0, 8],  text: " And so my fellow Americans ask not what your country can do for you" }
+          //     { timestamp: [8, 11], text: " ask what you can do for your country." }
+          //   ]
+          // }
+        }
+        await transcriber.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      models[1],
+      async () => {
+        let transcriber = await pipeline("automatic-speech-recognition", models[1]);
+
+        let url = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/french-audio.wav";
+        let audioData = await loadAudio(url);
+
+        {
+          // Transcribe French
+          let output = await transcriber(audioData, { language: "french", task: "transcribe" });
+          expect(output.text.length).toBeGreaterThan(20);
+          // { text: " J'adore, j'aime, je n'aime pas, je déteste." }
+        }
+
+        {
+          // Translate French to English.
+          let output = await transcriber(audioData, { language: "french", task: "translate" });
+          expect(output.text.length).toBeGreaterThan(20);
+          // { text: " I love, I like, I don't like, I hate." }
+        }
+        await transcriber.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      models[2].join(" + "),
+      async () => {
+        let transcriber = await pipeline("automatic-speech-recognition", m(models[2][0]), {
+          revision: models[2][1],
+          quantized: false,
+        });
+
+        let url = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav";
+        let audioData = await loadAudio(url);
+
+        {
+          // Transcribe English w/ word-level timestamps.
+          let output = await transcriber(audioData, { return_timestamps: "word" });
+          const target = {
+            text: " And so my fellow Americans ask not what your country can do for you ask what you can do for your country.",
+            chunks: [
+              { text: " And", timestamp: [0, 0.78] },
+              { text: " so", timestamp: [0.78, 1.06] },
+              { text: " my", timestamp: [1.06, 1.46] },
+              { text: " fellow", timestamp: [1.46, 1.76] },
+              { text: " Americans", timestamp: [1.76, 2.22] },
+              { text: " ask", timestamp: [2.22, 3.88] },
+              { text: " not", timestamp: [3.88, 4.52] },
+              { text: " what", timestamp: [4.52, 5.68] },
+              { text: " your", timestamp: [5.68, 6] },
+              { text: " country", timestamp: [6, 6.36] },
+              { text: " can", timestamp: [6.36, 6.76] },
+              { text: " do", timestamp: [6.76, 7.02] },
+              { text: " for", timestamp: [7.02, 7.24] },
+              { text: " you", timestamp: [7.24, 8.02] },
+              { text: " ask", timestamp: [8.28, 8.66] },
+              { text: " what", timestamp: [8.66, 8.94] },
+              { text: " you", timestamp: [8.94, 9.28] },
+              { text: " can", timestamp: [9.28, 9.5] },
+              { text: " do", timestamp: [9.5, 9.72] },
+              { text: " for", timestamp: [9.72, 9.92] },
+              { text: " your", timestamp: [9.92, 10.22] },
+              { text: " country.", timestamp: [10.22, 13.36] },
+            ],
+          };
+
+          compare(output, target);
+        }
+
+        await transcriber.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      models[3].join(" + "),
+      async () => {
+        let transcriber = await pipeline("automatic-speech-recognition", m(models[3][0]), {
+          revision: models[3][1],
+        });
+
+        let url = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/japanese-audio.wav";
+        let audioData = await loadAudio(url);
+
+        {
+          // Transcribe Japanese w/ word-level timestamps.
+          let output = await transcriber(audioData, { return_timestamps: "word", language: "japanese", task: "transcribe" });
+          const target = {
+            text: "モリナガの美味しい牛乳は濃い青色に牛乳瓶を払ったゼザインのパック牛乳である。",
+            chunks: [
+              { text: "モ", timestamp: [0, 0.56] },
+              { text: "リ", timestamp: [0.56, 0.64] },
+              { text: "ナ", timestamp: [0.64, 0.8] },
+              { text: "ガ", timestamp: [0.8, 0.88] },
+              { text: "の", timestamp: [0.88, 1.04] },
+              { text: "美味", timestamp: [1.04, 1.22] },
+              { text: "しい", timestamp: [1.22, 1.46] },
+              { text: "牛", timestamp: [1.46, 1.76] },
+              { text: "乳", timestamp: [1.76, 1.94] },
+              { text: "は", timestamp: [1.94, 2.14] },
+              { text: "濃", timestamp: [2.14, 2.34] },
+              { text: "い", timestamp: [2.34, 2.48] },
+              { text: "青", timestamp: [2.48, 2.62] },
+              { text: "色", timestamp: [2.62, 2.84] },
+              { text: "に", timestamp: [2.84, 3] },
+              { text: "牛", timestamp: [3, 3.22] },
+              { text: "乳", timestamp: [3.22, 3.42] },
+              { text: "瓶", timestamp: [3.42, 3.58] },
+              { text: "を", timestamp: [3.58, 3.82] },
+              { text: "払", timestamp: [3.82, 4] },
+              { text: "った", timestamp: [4, 4.32] },
+              { text: "ゼ", timestamp: [4.32, 4.56] },
+              { text: "ザ", timestamp: [4.56, 4.6] },
+              { text: "イ", timestamp: [4.6, 4.74] },
+              { text: "ン", timestamp: [4.74, 4.8] },
+              { text: "の", timestamp: [4.8, 4.94] },
+              { text: "パ", timestamp: [4.94, 5.12] },
+              { text: "ック", timestamp: [5.12, 5.26] },
+              { text: "牛", timestamp: [5.26, 5.52] },
+              { text: "乳", timestamp: [5.52, 5.72] },
+              { text: "で", timestamp: [5.72, 5.86] },
+              { text: "ある。", timestamp: [5.86, 6.62] },
+            ],
+          };
+
+          compare(output, target);
+        }
+
+        await transcriber.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      models[4],
+      async () => {
+        let transcriber = await pipeline("automatic-speech-recognition", m(models[4]));
+
+        let url = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav";
+        let audioData = await loadAudio(url);
+
+        {
+          // Transcribe
+          let output = await transcriber(audioData);
+          expect(output.text.length).toBeGreaterThan(50);
+          // { text: "and so my fellow america ask not what your country can do for you ask what you can do for your country" }
+        }
+
+        await transcriber.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Text-to-speech generation", () => {
+    // List all models which will be tested
+    const models = ["Xenova/speecht5_tts", "Xenova/mms-tts-fra"];
+
+    it(
+      models[0],
+      async () => {
+        let synthesizer = await pipeline("text-to-speech", models[0], {
+          // NOTE: Although the quantized version produces incoherent results,
+          // it it is okay to use for testing.
+          // quantized: false,
+        });
+
+        let speaker_embeddings = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin";
+
+        {
+          // Generate English speech
+          let output = await synthesizer("Hello, my dog is cute", { speaker_embeddings });
+          expect(output.audio.length).toBeGreaterThan(0);
+          expect(output.sampling_rate).toEqual(16000);
+        }
+
+        await synthesizer.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      models[1],
+      async () => {
+        let synthesizer = await pipeline("text-to-speech", models[1]);
+
+        {
+          // Generate French speech
+          let output = await synthesizer("Bonjour");
+          expect(output.audio.length).toBeGreaterThan(0);
+          expect(output.sampling_rate).toEqual(16000);
+        }
+
+        await synthesizer.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Audio classification", () => {
+    // List all models which will be tested
+    const models = ["Xenova/wav2vec2-large-xlsr-53-gender-recognition-librispeech"];
+
+    it(
+      models[0],
+      async () => {
+        let classifier = await pipeline("audio-classification", models[0]);
+
+        let url = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav";
+        let audioData = await loadAudio(url);
+
+        {
+          // Classify audio
+          let outputs = await classifier(audioData);
+
+          let expected = [
+            { score: 0.997512936592102, label: "male" },
+            { score: 0.0024870133493095636, label: "female" },
+          ];
+          compare(outputs, expected);
+        }
+
+        await classifier.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Image-to-text", () => {
+    // List all models which will be tested
+    const models = ["Xenova/vit-gpt2-image-captioning"];
+
+    it(
+      models[0],
+      async () => {
+        let captioner = await pipeline("image-to-text", models[0]);
+
+        let url = "https://huggingface.co/datasets/mishig/sample_images/resolve/main/savanna.jpg";
+        let urls = ["https://huggingface.co/datasets/mishig/sample_images/resolve/main/football-match.jpg", "https://huggingface.co/datasets/mishig/sample_images/resolve/main/airport.jpg"];
+
+        // single
+        {
+          let output = await captioner(url, {
+            top_k: 0,
+            do_sample: false,
+          });
+          // let expected = [
+          //     { "generated_text": "a herd of giraffes and zebras grazing in a field" }
+          // ]
+
+          expect(output).toHaveLength(1);
+          expect(output[0].generated_text.length).toBeGreaterThan(10);
+        }
+
+        // single + generation options
+        {
+          let output = await captioner(url, {
+            max_new_tokens: 20,
+            num_beams: 2,
+            num_return_sequences: 2,
+            top_k: 0,
+            do_sample: false,
+          });
+          // let expected = [
+          //     { "generated_text": "a herd of giraffes and zebras grazing in a field" },
+          //     { "generated_text": "a herd of giraffes and zebras in a grassy field" }
+          // ]
+
+          expect(output).toHaveLength(2);
+          expect(output[0].generated_text.length).toBeGreaterThan(10);
+          expect(output[1].generated_text.length).toBeGreaterThan(10);
+        }
+
+        // batched
+        {
+          let output = await captioner(urls, {
+            top_k: 0,
+            do_sample: false,
+          });
+          // let expected = [
+          //     [{ "generated_text": "two men are kicking a soccer ball in a soccer game" }],
+          //     [{ "generated_text": "a plane on the tarmac with a passenger bus" }]
+          // ]
+
+          expect(output).toHaveLength(2);
+          expect(output[0]).toHaveLength(1);
+          expect(output[0][0].generated_text.length).toBeGreaterThan(10);
+          expect(output[1]).toHaveLength(1);
+          expect(output[1][0].generated_text.length).toBeGreaterThan(10);
+        }
+
+        // batched + generation options
+        {
+          let output = await captioner(urls, {
+            max_new_tokens: 20,
+            num_beams: 2,
+            num_return_sequences: 2,
+            top_k: 0,
+            do_sample: false,
+          });
+          // let expected = [
+          //     [
+          //         { "generated_text": "two men are kicking a soccer ball on a field" },
+          //         { "generated_text": "two men are kicking a soccer ball in a soccer game" }
+          //     ], [
+          //         { "generated_text": "a plane on a tarmac with a group of buses" },
+          //         { "generated_text": "a plane on a tarmac with a group of people on the ground" }
+          //     ]
+          // ];
+
+          expect(output).toHaveLength(2);
+          expect(output[0]).toHaveLength(2);
+          expect(output[0][0].generated_text.length).toBeGreaterThan(10);
+          expect(output[0][1].generated_text.length).toBeGreaterThan(10);
+          expect(output[1]).toHaveLength(2);
+          expect(output[1][0].generated_text.length).toBeGreaterThan(10);
+          expect(output[1][1].generated_text.length).toBeGreaterThan(10);
+        }
+        await captioner.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Image classification", () => {
+    // List all models which will be tested
+    const models = ["Xenova/vit-base-patch16-224"];
+
+    it(
+      models[0],
+      async () => {
+        let classifier = await pipeline("image-classification", models[0]);
+
+        let url = "https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg";
+        let urls = ["https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg", "https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg"];
+
+        // single
+        {
+          let outputs = await classifier(url);
+
+          let expected = [{ label: "tiger, Panthera tigris", score: 0.607988178730011 }];
+
+          compare(outputs, expected, 0.2);
+        }
+
+        // single + topk
+        {
+          let outputs = await classifier(url, {
+            topk: 2,
+          });
+
+          let expected = [
+            { label: "tiger, Panthera tigris", score: 0.607988178730011 },
+            { label: "tiger cat", score: 0.3877776563167572 },
+          ];
+
+          compare(outputs, expected, 0.2);
+        }
+
+        // batched
+        {
+          let outputs = await classifier(urls);
+
+          let expected = [
+            { label: "palace", score: 0.9986862540245056 },
+            { label: "teapot", score: 0.987880527973175 },
+          ];
+
+          compare(outputs, expected);
+        }
+
+        // batched + topk
+        {
+          let outputs = await classifier(urls, {
+            topk: 2,
+          });
+
+          let expected = [
+            [
+              { label: "palace", score: 0.9986862540245056 },
+              { label: "castle", score: 0.00037879671435803175 },
+            ],
+            [
+              { label: "teapot", score: 0.987880527973175 },
+              { label: "coffeepot", score: 0.006591461598873138 },
+            ],
+          ];
+
+          compare(outputs, expected);
+        }
+
+        await classifier.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Image segmentation", () => {
+    // List all models which will be tested
+    const models = ["Xenova/detr-resnet-50-panoptic", "Xenova/segformer_b2_clothes"];
+
+    it(
+      models[0],
+      async () => {
+        let segmenter = await pipeline("image-segmentation", models[0], {
+          // Quantized version of model produces incorrect results
+          quantized: false,
+        });
+        let img = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png";
+
+        // single
+        {
+          let outputs = await segmenter(img);
+
+          let expected = [
+            { score: 0.9916538596153259, label: "cat", mask: 58998 },
+            { score: 0.9987397789955139, label: "remote", mask: 4164 },
+            { score: 0.9994599223136902, label: "remote", mask: 2275 },
+            { score: 0.9730215072631836, label: "couch", mask: 176980 },
+            { score: 0.9993911385536194, label: "cat", mask: 52670 },
+          ];
+
+          let outputLabels = outputs.map((x) => x.label);
+          let expectedLabels = expected.map((x) => x.label);
+
+          expect(outputLabels).toHaveLength(expectedLabels.length);
+          expect(outputLabels.sort()).toEqual(expectedLabels.sort());
+        }
+
+        await segmenter.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      models[1],
+      async () => {
+        let segmenter = await pipeline("image-segmentation", models[1]);
+        let img = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/young-man-standing-and-leaning-on-car.jpg";
+
+        // single
+        {
+          let outputs = await segmenter(img);
+
+          let expected = [{ label: "Background" }, { label: "Hair" }, { label: "Upper-clothes" }, { label: "Pants" }, { label: "Left-shoe" }, { label: "Right-shoe" }, { label: "Face" }, { label: "Left-leg" }, { label: "Right-leg" }, { label: "Left-arm" }, { label: "Right-arm" }];
+
+          let outputLabels = outputs.map((x) => x.label);
+          let expectedLabels = expected.map((x) => x.label);
+
+          expect(outputLabels).toHaveLength(expectedLabels.length);
+          expect(outputLabels.sort()).toEqual(expectedLabels.sort());
+
+          // check that all scores are null, and masks have correct dimensions
+          for (let output of outputs) {
+            expect(output.score).toBeNull();
+            expect(output.mask.width).toEqual(970);
+            expect(output.mask.height).toEqual(1455);
+            expect(output.mask.channels).toEqual(1);
+          }
+        }
+
+        await segmenter.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Zero-shot image classification", () => {
+    // List all models which will be tested
+    const models = ["Xenova/clip-vit-base-patch32"];
+
+    it(
+      models[0],
+      async () => {
+        let classifier = await pipeline("zero-shot-image-classification", models[0]);
+
+        let url = "https://huggingface.co/datasets/mishig/sample_images/resolve/main/football-match.jpg";
+        let urls = ["https://huggingface.co/datasets/mishig/sample_images/resolve/main/football-match.jpg", "https://huggingface.co/datasets/mishig/sample_images/resolve/main/airport.jpg", "https://huggingface.co/datasets/mishig/sample_images/resolve/main/savanna.jpg"];
+
+        let classes = ["football", "airport", "animals"];
+
+        // single
+        {
+          let output = await classifier(url, classes);
+
+          let expected = [
+            { score: 0.9719080924987793, label: "football" },
+            { score: 0.022564826533198357, label: "animals" },
+            { score: 0.005527070723474026, label: "airport" },
+          ];
+          compare(output, expected, 0.1);
+        }
+
+        // batched
+        {
+          let output = await classifier(urls, classes);
+
+          let expected = [
+            [
+              { score: 0.9712504148483276, label: "football" },
+              { score: 0.022469401359558105, label: "animals" },
+              { score: 0.006280169822275639, label: "airport" },
+            ],
+            [
+              { score: 0.997433602809906, label: "airport" },
+              { score: 0.0016500800848007202, label: "animals" },
+              { score: 0.0009163151844404638, label: "football" },
+            ],
+            [
+              { score: 0.9851226806640625, label: "animals" },
+              { score: 0.007516484707593918, label: "football" },
+              { score: 0.007360846735537052, label: "airport" },
+            ],
+          ];
+          compare(output, expected, 0.1);
+        }
+        await classifier.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Object detection", () => {
+    // List all models which will be tested
+    const models = ["Xenova/detr-resnet-50"];
+
+    it(
+      models[0],
+      async () => {
+        let detector = await pipeline("object-detection", models[0]);
+
+        // TODO add batched test cases when supported
+        let url = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg";
+        let urls = ["https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/savanna.jpg"];
+
+        // single + threshold
+        {
+          let output = await detector(url, {
+            threshold: 0.9,
+          });
+
+          // let expected = [
+          //     {
+          //         "score": 0.9977124929428101,
+          //         "label": "remote",
+          //         "box": { "xmin": 41, "ymin": 70, "xmax": 176, "ymax": 118 }
+          //     },
+          //     {
+          //         "score": 0.9984639883041382,
+          //         "label": "remote",
+          //         "box": { "xmin": 332, "ymin": 73, "xmax": 369, "ymax": 188 }
+          //     },
+          //     {
+          //         "score": 0.9964856505393982,
+          //         "label": "couch",
+          //         "box": { "xmin": 0, "ymin": 1, "xmax": 639, "ymax": 474 }
+          //     },
+          //     {
+          //         "score": 0.9988334774971008,
+          //         "label": "cat",
+          //         "box": { "xmin": 11, "ymin": 51, "xmax": 314, "ymax": 472 }
+          //     },
+          //     {
+          //         "score": 0.9982513785362244,
+          //         "label": "cat",
+          //         "box": { "xmin": 345, "ymin": 22, "xmax": 640, "ymax": 371 }
+          //     }
+          // ]
+
+          expect(output.length).toBeGreaterThan(0);
+          for (let cls of output) {
+            expect(typeof cls.score).toBe("number");
+            expect(typeof cls.label).toBe("string");
+            for (let key of ["xmin", "ymin", "xmax", "ymax"]) {
+              expect(typeof cls.box[key]).toBe("number");
             }
-            await generator.dispose();
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Text generation', () => {
-
-        // List all models which will be tested
-        const models = [
-            'distilgpt2',
-
-            'Salesforce/codegen-350M-mono',
-        ];
-
-        it(models[0], async () => {
-            let generator = await pipeline('text-generation', m(models[0]));
-            let texts = [
-                'Once upon a time, there was a',
-                'I enjoy walking with my cute dog',
-            ];
-
-            // single
-            {
-                let output = await generator(texts[0], {
-                    max_new_tokens: 10,
-                    top_k: 0,
-                    do_sample: false
-                })
-                expect(output).toHaveLength(1);
-                expect(output[0].generated_text.length).toBeGreaterThan(texts[0].length);
-            }
-
-            // single + `num_beams` + `num_return_sequences`
-            {
-                let output = await generator(texts[0], {
-                    max_new_tokens: 10,
-                    num_beams: 2,
-                    num_return_sequences: 2,
-                    top_k: 0,
-                    do_sample: false
-                })
-                expect(output).toHaveLength(2);
-                expect(output[0].generated_text.length).toBeGreaterThan(texts[0].length);
-                expect(output[1].generated_text.length).toBeGreaterThan(texts[0].length);
-
-            }
-
-            // batched + `num_beams` + `num_return_sequences`
-            {
-                let output = await generator(texts, {
-                    max_new_tokens: 10,
-                    num_beams: 2,
-                    num_return_sequences: 2,
-                    top_k: 0,
-                    do_sample: false
-                });
-                expect(output).toHaveLength(2);
-                expect(output[0]).toHaveLength(2);
-                expect(output[0][0].generated_text.length).toBeGreaterThan(texts[0].length);
-                expect(output[0][1].generated_text.length).toBeGreaterThan(texts[0].length);
-                expect(output[1]).toHaveLength(2);
-                expect(output[1][0].generated_text.length).toBeGreaterThan(texts[1].length);
-                expect(output[1][1].generated_text.length).toBeGreaterThan(texts[1].length);
-
-            }
-
-            await generator.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-
-
-        it(models[1], async () => {
-            let generator = await pipeline('text-generation', m(models[1]));
-            let code = 'def fib(n):';
-
-            // single + `added_tokens`
-            {
-                let output = await generator(code, {
-                    max_new_tokens: 45,
-                    top_k: 0,
-                    do_sample: false
-                })
-                expect(output).toHaveLength(1);
-                expect(output[0].generated_text.length).toBeGreaterThan(code.length);
-            }
-            await generator.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Feature extraction', () => {
-
-        // List all models which will be tested
-        const models = [
-            'sentence-transformers/all-MiniLM-L6-v2',
-        ];
-
-        it(models[0], async () => {
-            let extractor = await pipeline('feature-extraction', m(models[0]));
-
-            // Provide sentences
-            let sentences = [
-                'This framework generates embeddings for each input sentence',
-                'Sentences are passed as a list of string.',
-                'The quick brown fox jumps over the lazy dog.'
-            ]
-
-            // Without pooling or normalization
-            {
-
-                let output = await extractor(sentences);
-                expect(output.dims).toHaveLength(3);
+          }
+        }
+
+        // batched + threshold + percentage
+        {
+          let output = await detector(urls, {
+            threshold: 0.9,
+            percentage: true,
+          });
+          // let expected = [[
+          //     {
+          //         score: 0.9991137385368347,
+          //         label: 'zebra',
+          //         box: { xmin: 0.65165576338768, ymin: 0.685152679681778, xmax: 0.723189502954483, ymax: 0.8801506459712982 }
+          //     },
+          //     {
+          //         score: 0.998811662197113,
+          //         label: 'zebra',
+          //         box: { xmin: 0.20797613263130188, ymin: 0.6543092578649521, xmax: 0.4147692620754242, ymax: 0.9040975719690323 }
+          //     },
+          //     {
+          //         score: 0.9707837104797363,
+          //         label: 'giraffe',
+          //         box: { xmin: 0.02498096227645874, ymin: 0.40549489855766296, xmax: 0.38669759035110474, ymax: 0.7895723879337311 }
+          //     },
+          //     {
+          //         score: 0.9984336495399475,
+          //         label: 'zebra',
+          //         box: { xmin: 0.3540637195110321, ymin: 0.6370827257633209, xmax: 0.5765090882778168, ymax: 0.8480959832668304 }
+          //     },
+          //     {
+          //         score: 0.9986463785171509,
+          //         label: 'giraffe',
+          //         box: { xmin: 0.6763969212770462, ymin: 0.25748637318611145, xmax: 0.974339172244072, ymax: 0.8684568107128143 }
+          //     }
+          // ]]
+
+          expect(output).toHaveLength(urls.length); // Same number of inputs as outputs
+
+          for (let i = 0; i < output.length; ++i) {
+            expect(output[i].length).toBeGreaterThan(0);
+            for (let cls of output[i]) {
+              expect(typeof cls.score).toBe("number");
+              expect(typeof cls.label).toBe("string");
+              for (let key of ["xmin", "ymin", "xmax", "ymax"]) {
+                expect(typeof cls.box[key]).toBe("number");
+              }
             }
-
-            // With pooling and normalization + compare features
-            {
-                let output = await extractor(sentences, { pooling: 'mean', normalize: true });
-                expect(output.dims).toHaveLength(2);
-
-                // Convert Tensor to JS list
-                output = output.tolist();
-
-                let pairwiseScores = [[output[0], output[1]], [output[0], output[2]], [output[1], output[2]]].map(x => cos_sim(...x))
-
-                let expected = [0.502872309810269, 0.11088411026413121, 0.09602621986931259]
-                compare(pairwiseScores, expected);
+          }
+        }
+
+        await detector.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Zero-shot object detection", () => {
+    // List all models which will be tested
+    const models = ["Xenova/owlvit-base-patch32"];
+
+    it(
+      models[0],
+      async () => {
+        let detector = await pipeline("zero-shot-object-detection", models[0]);
+
+        // single (default)
+        {
+          let url = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/astronaut.png";
+          let candidate_labels = ["human face", "rocket", "helmet", "american flag"];
+
+          let output = await detector(url, candidate_labels);
+
+          // let expected = [
+          //     {
+          //         score: 0.24392342567443848,
+          //         label: 'human face',
+          //         box: { xmin: 180, ymin: 67, xmax: 274, ymax: 175 }
+          //     },
+          //     {
+          //         score: 0.15129457414150238,
+          //         label: 'american flag',
+          //         box: { xmin: 0, ymin: 4, xmax: 106, ymax: 513 }
+          //     },
+          //     {
+          //         score: 0.13649864494800568,
+          //         label: 'helmet',
+          //         box: { xmin: 277, ymin: 337, xmax: 511, ymax: 511 }
+          //     },
+          //     {
+          //         score: 0.10262022167444229,
+          //         label: 'rocket',
+          //         box: { xmin: 352, ymin: -1, xmax: 463, ymax: 287 }
+          //     }
+          // ]
+
+          expect(output.length).toBeGreaterThan(0);
+          for (let cls of output) {
+            expect(typeof cls.score).toBe("number");
+            expect(typeof cls.label).toBe("string");
+            for (let key of ["xmin", "ymin", "xmax", "ymax"]) {
+              expect(typeof cls.box[key]).toBe("number");
             }
-            await extractor.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Speech-to-text generation', () => {
-
-        // List all models which will be tested
-        const models = [
-            // whisper
-            'openai/whisper-tiny.en', // English-only
-            'openai/whisper-small', // Multilingual
-            ['openai/whisper-tiny.en', 'output_attentions'], // English-only + `output_attentions`
-            ['openai/whisper-small', 'output_attentions'], // Multilingual + `output_attentions`
-
-            // wav2vec2
-            'jonatasgrosman/wav2vec2-large-xlsr-53-english',
-        ];
-
-        it(models[0], async () => {
-            let transcriber = await pipeline('automatic-speech-recognition', m(models[0]));
-
-            let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav';
-            let audioData = await loadAudio(url);
-
-            { // Transcribe English
-                let output = await transcriber(audioData);
-                expect(output.text.length).toBeGreaterThan(50);
-                // { text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country." }
-            }
-
-            { // Transcribe English w/ timestamps.
-                let output = await transcriber(audioData, { return_timestamps: true });
-                expect(output.text.length).toBeGreaterThan(50);
-                expect(output.chunks.length).toBeGreaterThan(0);
-                // {
-                //   text: " And so my fellow Americans ask not what your country can do for you, ask what you can do for your country."
-                //   chunks: [
-                //     { timestamp: [0, 8],  text: " And so my fellow Americans ask not what your country can do for you" }
-                //     { timestamp: [8, 11], text: " ask what you can do for your country." }
-                //   ]
-                // }
+          }
+        }
+
+        // topk + threshold + percentage
+        {
+          let url = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/beach.png";
+          let candidate_labels = ["hat", "book", "sunglasses", "camera"];
+
+          let output = await detector(url, candidate_labels, {
+            topk: 4,
+            threshold: 0.05,
+            percentage: true,
+          });
+
+          // let expected = [
+          //     {
+          //         score: 0.1606510728597641,
+          //         label: 'sunglasses',
+          //         box: { xmin: 347, ymin: 229, xmax: 429, ymax: 264 }
+          //     },
+          //     {
+          //         score: 0.08935828506946564,
+          //         label: 'hat',
+          //         box: { xmin: 38, ymin: 174, xmax: 258, ymax: 364 }
+          //     },
+          //     {
+          //         score: 0.08530698716640472,
+          //         label: 'camera',
+          //         box: { xmin: 187, ymin: 350, xmax: 260, ymax: 411 }
+          //     },
+          //     {
+          //         score: 0.08349756896495819,
+          //         label: 'book',
+          //         box: { xmin: 261, ymin: 280, xmax: 494, ymax: 425 }
+          //     }
+          // ]
+
+          expect(output.length).toBeGreaterThan(0);
+          for (let cls of output) {
+            expect(typeof cls.score).toBe("number");
+            expect(typeof cls.label).toBe("string");
+            for (let key of ["xmin", "ymin", "xmax", "ymax"]) {
+              expect(typeof cls.box[key]).toBe("number");
             }
-            await transcriber.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-
-        it(models[1], async () => {
-            let transcriber = await pipeline('automatic-speech-recognition', m(models[1]));
-
-            let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/french-audio.wav';
-            let audioData = await loadAudio(url);
-
-            { // Transcribe French
-                let output = await transcriber(audioData, { language: 'french', task: 'transcribe' });
-                expect(output.text.length).toBeGreaterThan(20);
-                // { text: " J'adore, j'aime, je n'aime pas, je déteste." }
-            }
-
-            { // Translate French to English.
-                let output = await transcriber(audioData, { language: 'french', task: 'translate' });
-                expect(output.text.length).toBeGreaterThan(20);
-                // { text: " I love, I like, I don't like, I hate." }
-            }
-            await transcriber.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-
-        it(models[2].join(' + '), async () => {
-            let transcriber = await pipeline('automatic-speech-recognition', m(models[2][0]), {
-                revision: models[2][1],
-                quantized: false,
-            });
-
-
-            let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav';
-            let audioData = await loadAudio(url);
-
-            { // Transcribe English w/ word-level timestamps.
-                let output = await transcriber(audioData, { return_timestamps: 'word' });
-                const target = {
-                    "text": " And so my fellow Americans ask not what your country can do for you ask what you can do for your country.",
-                    "chunks": [
-                        { "text": " And", "timestamp": [0, 0.78] },
-                        { "text": " so", "timestamp": [0.78, 1.06] },
-                        { "text": " my", "timestamp": [1.06, 1.46] },
-                        { "text": " fellow", "timestamp": [1.46, 1.76] },
-                        { "text": " Americans", "timestamp": [1.76, 2.22] },
-                        { "text": " ask", "timestamp": [2.22, 3.88] },
-                        { "text": " not", "timestamp": [3.88, 4.52] },
-                        { "text": " what", "timestamp": [4.52, 5.68] },
-                        { "text": " your", "timestamp": [5.68, 6] },
-                        { "text": " country", "timestamp": [6, 6.36] },
-                        { "text": " can", "timestamp": [6.36, 6.76] },
-                        { "text": " do", "timestamp": [6.76, 7.02] },
-                        { "text": " for", "timestamp": [7.02, 7.24] },
-                        { "text": " you", "timestamp": [7.24, 8.02] },
-                        { "text": " ask", "timestamp": [8.28, 8.66] },
-                        { "text": " what", "timestamp": [8.66, 8.94] },
-                        { "text": " you", "timestamp": [8.94, 9.28] },
-                        { "text": " can", "timestamp": [9.28, 9.5] },
-                        { "text": " do", "timestamp": [9.5, 9.72] },
-                        { "text": " for", "timestamp": [9.72, 9.92] },
-                        { "text": " your", "timestamp": [9.92, 10.22] },
-                        { "text": " country.", "timestamp": [10.22, 13.36] }
-                    ]
-                }
-
-                compare(output, target);
-            }
-
-            await transcriber.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-
-        it(models[3].join(' + '), async () => {
-            let transcriber = await pipeline('automatic-speech-recognition', m(models[3][0]), {
-                revision: models[3][1],
-            });
-
-            let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/japanese-audio.wav';
-            let audioData = await loadAudio(url);
-
-            { // Transcribe Japanese w/ word-level timestamps.
-                let output = await transcriber(audioData, { return_timestamps: 'word', language: 'japanese', task: 'transcribe' });
-                const target = {
-                    "text": "モリナガの美味しい牛乳は濃い青色に牛乳瓶を払ったゼザインのパック牛乳である。",
-                    "chunks": [
-                        { "text": "モ", "timestamp": [0, 0.56] },
-                        { "text": "リ", "timestamp": [0.56, 0.64] },
-                        { "text": "ナ", "timestamp": [0.64, 0.8] },
-                        { "text": "ガ", "timestamp": [0.8, 0.88] },
-                        { "text": "の", "timestamp": [0.88, 1.04] },
-                        { "text": "美味", "timestamp": [1.04, 1.22] },
-                        { "text": "しい", "timestamp": [1.22, 1.46] },
-                        { "text": "牛", "timestamp": [1.46, 1.76] },
-                        { "text": "乳", "timestamp": [1.76, 1.94] },
-                        { "text": "は", "timestamp": [1.94, 2.14] },
-                        { "text": "濃", "timestamp": [2.14, 2.34] },
-                        { "text": "い", "timestamp": [2.34, 2.48] },
-                        { "text": "青", "timestamp": [2.48, 2.62] },
-                        { "text": "色", "timestamp": [2.62, 2.84] },
-                        { "text": "に", "timestamp": [2.84, 3] },
-                        { "text": "牛", "timestamp": [3, 3.22] },
-                        { "text": "乳", "timestamp": [3.22, 3.42] },
-                        { "text": "瓶", "timestamp": [3.42, 3.58] },
-                        { "text": "を", "timestamp": [3.58, 3.82] },
-                        { "text": "払", "timestamp": [3.82, 4] },
-                        { "text": "った", "timestamp": [4, 4.32] },
-                        { "text": "ゼ", "timestamp": [4.32, 4.56] },
-                        { "text": "ザ", "timestamp": [4.56, 4.6] },
-                        { "text": "イ", "timestamp": [4.6, 4.74] },
-                        { "text": "ン", "timestamp": [4.74, 4.8] },
-                        { "text": "の", "timestamp": [4.8, 4.94] },
-                        { "text": "パ", "timestamp": [4.94, 5.12] },
-                        { "text": "ック", "timestamp": [5.12, 5.26] },
-                        { "text": "牛", "timestamp": [5.26, 5.52] },
-                        { "text": "乳", "timestamp": [5.52, 5.72] },
-                        { "text": "で", "timestamp": [5.72, 5.86] },
-                        { "text": "ある。", "timestamp": [5.86, 6.62] }
-                    ]
-                }
-
-                compare(output, target);
-            }
-
-            await transcriber.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-
-
-        it(models[4], async () => {
-            let transcriber = await pipeline('automatic-speech-recognition', m(models[4]));
-
-            let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav';
-            let audioData = await loadAudio(url);
-
-            { // Transcribe
-                let output = await transcriber(audioData);
-                expect(output.text.length).toBeGreaterThan(50);
-                // { text: "and so my fellow america ask not what your country can do for you ask what you can do for your country" }
-            }
-
-            await transcriber.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Text-to-speech generation', () => {
-
-        // List all models which will be tested
-        const models = [
-            'microsoft/speecht5_tts',
-            'facebook/mms-tts-fra',
-        ];
-
-        it(models[0], async () => {
-            let synthesizer = await pipeline('text-to-speech', m(models[0]), {
-                // NOTE: Although the quantized version produces incoherent results,
-                // it it is okay to use for testing.
-                // quantized: false,
-            });
-
-            let speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';
-
-            { // Generate English speech
-                let output = await synthesizer('Hello, my dog is cute', { speaker_embeddings });
-                expect(output.audio.length).toBeGreaterThan(0);
-                expect(output.sampling_rate).toEqual(16000);
-            }
-
-            await synthesizer.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-
-        it(models[1], async () => {
-            let synthesizer = await pipeline('text-to-speech', m(models[1]));
-
-            { // Generate French speech
-                let output = await synthesizer('Bonjour');
-                expect(output.audio.length).toBeGreaterThan(0);
-                expect(output.sampling_rate).toEqual(16000);
-            }
-
-            await synthesizer.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-
-    });
-
-    describe('Audio classification', () => {
-
-        // List all models which will be tested
-        const models = [
-            'alefiury/wav2vec2-large-xlsr-53-gender-recognition-librispeech',
-        ];
-
-        it(models[0], async () => {
-            let classifier = await pipeline('audio-classification', m(models[0]));
-
-            let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav';
-            let audioData = await loadAudio(url);
-
-            { // Classify audio
-                let outputs = await classifier(audioData);
-
-                let expected = [
-                    { 'score': 0.997512936592102, 'label': 'male' },
-                    { 'score': 0.0024870133493095636, 'label': 'female' }
-                ];
-                compare(outputs, expected);
-            }
-
-            await classifier.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-
-    });
-
-    describe('Image-to-text', () => {
-
-        // List all models which will be tested
-        const models = [
-            'nlpconnect/vit-gpt2-image-captioning',
-        ];
-
-        it(models[0], async () => {
-            let captioner = await pipeline('image-to-text', m(models[0]));
-
-            let url = 'https://huggingface.co/datasets/mishig/sample_images/resolve/main/savanna.jpg';
-            let urls = [
-                'https://huggingface.co/datasets/mishig/sample_images/resolve/main/football-match.jpg',
-                'https://huggingface.co/datasets/mishig/sample_images/resolve/main/airport.jpg'
-            ]
-
-            // single
-            {
-                let output = await captioner(url, {
-                    top_k: 0,
-                    do_sample: false
-                })
-                // let expected = [
-                //     { "generated_text": "a herd of giraffes and zebras grazing in a field" }
-                // ]
-
-                expect(output).toHaveLength(1);
-                expect(output[0].generated_text.length).toBeGreaterThan(10);
-            }
-
-            // single + generation options
-            {
-                let output = await captioner(url, {
-                    max_new_tokens: 20,
-                    num_beams: 2,
-                    num_return_sequences: 2,
-                    top_k: 0,
-                    do_sample: false
-                })
-                // let expected = [
-                //     { "generated_text": "a herd of giraffes and zebras grazing in a field" },
-                //     { "generated_text": "a herd of giraffes and zebras in a grassy field" }
-                // ]
-
-                expect(output).toHaveLength(2);
-                expect(output[0].generated_text.length).toBeGreaterThan(10);
-                expect(output[1].generated_text.length).toBeGreaterThan(10);
-
-            }
-
-            // batched
-            {
-                let output = await captioner(urls, {
-                    top_k: 0,
-                    do_sample: false
-                })
-                // let expected = [
-                //     [{ "generated_text": "two men are kicking a soccer ball in a soccer game" }],
-                //     [{ "generated_text": "a plane on the tarmac with a passenger bus" }]
-                // ]
-
-                expect(output).toHaveLength(2);
-                expect(output[0]).toHaveLength(1);
-                expect(output[0][0].generated_text.length).toBeGreaterThan(10);
-                expect(output[1]).toHaveLength(1);
-                expect(output[1][0].generated_text.length).toBeGreaterThan(10);
-            }
-
-            // batched + generation options
-            {
-                let output = await captioner(urls, {
-                    max_new_tokens: 20,
-                    num_beams: 2,
-                    num_return_sequences: 2,
-                    top_k: 0,
-                    do_sample: false
-                })
-                // let expected = [
-                //     [
-                //         { "generated_text": "two men are kicking a soccer ball on a field" },
-                //         { "generated_text": "two men are kicking a soccer ball in a soccer game" }
-                //     ], [
-                //         { "generated_text": "a plane on a tarmac with a group of buses" },
-                //         { "generated_text": "a plane on a tarmac with a group of people on the ground" }
-                //     ]
-                // ];
-
-                expect(output).toHaveLength(2);
-                expect(output[0]).toHaveLength(2);
-                expect(output[0][0].generated_text.length).toBeGreaterThan(10);
-                expect(output[0][1].generated_text.length).toBeGreaterThan(10);
-                expect(output[1]).toHaveLength(2);
-                expect(output[1][0].generated_text.length).toBeGreaterThan(10);
-                expect(output[1][1].generated_text.length).toBeGreaterThan(10);
-
-            }
-            await captioner.dispose();
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Image classification', () => {
-
-        // List all models which will be tested
-        const models = [
-            'google/vit-base-patch16-224',
-        ];
-
-        it(models[0], async () => {
-            let classifier = await pipeline('image-classification', m(models[0]));
-
-            let url = 'https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg';
-            let urls = [
-                'https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg',
-                'https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg'
-            ]
-
-            // single
-            {
-                let outputs = await classifier(url);
-
-                let expected = [
-                    { "label": "tiger, Panthera tigris", "score": 0.607988178730011 }
-                ];
-
-                compare(outputs, expected, 0.2);
-
-            }
-
-            // single + topk
-            {
-                let outputs = await classifier(url, {
-                    topk: 2
-                });
-
-                let expected = [
-                    { "label": "tiger, Panthera tigris", "score": 0.607988178730011 },
-                    { "label": "tiger cat", "score": 0.3877776563167572 }
-                ];
-
-                compare(outputs, expected, 0.2);
-            }
-
-
-            // batched
-            {
-                let outputs = await classifier(urls);
-
-                let expected = [
-                    { "label": "palace", "score": 0.9986862540245056 },
-                    { "label": "teapot", "score": 0.987880527973175 }
-                ];
-
-                compare(outputs, expected);
-            }
-
-            // batched + topk
-            {
-                let outputs = await classifier(urls, {
-                    topk: 2
-                });
-
-                let expected = [
-                    [
-                        { "label": "palace", "score": 0.9986862540245056 },
-                        { "label": "castle", "score": 0.00037879671435803175 }
-                    ],
-                    [
-                        { "label": "teapot", "score": 0.987880527973175 },
-                        { "label": "coffeepot", "score": 0.006591461598873138 }
-                    ]
-                ];
-
-                compare(outputs, expected);
-            }
-
-            await classifier.dispose();
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Image segmentation', () => {
-
-        // List all models which will be tested
-        const models = [
-            'facebook/detr-resnet-50-panoptic',
-            'mattmdjaga/segformer_b2_clothes',
-        ];
-
-        it(models[0], async () => {
-            let segmenter = await pipeline('image-segmentation', m(models[0]), {
-                // Quantized version of model produces incorrect results
-                quantized: false,
-            })
-            let img = 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png';
-
-            // single
-            {
-                let outputs = await segmenter(img);
-
-                let expected = [
-                    { score: 0.9916538596153259, label: 'cat', mask: 58998 },
-                    { score: 0.9987397789955139, label: 'remote', mask: 4164 },
-                    { score: 0.9994599223136902, label: 'remote', mask: 2275 },
-                    { score: 0.9730215072631836, label: 'couch', mask: 176980 },
-                    { score: 0.9993911385536194, label: 'cat', mask: 52670 }
-                ];
-
-                let outputLabels = outputs.map(x => x.label);
-                let expectedLabels = expected.map(x => x.label);
-
-                expect(outputLabels).toHaveLength(expectedLabels.length);
-                expect(outputLabels.sort()).toEqual(expectedLabels.sort())
-            }
-
-            await segmenter.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-
-        it(models[1], async () => {
-            let segmenter = await pipeline('image-segmentation', m(models[1]));
-            let img = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/young-man-standing-and-leaning-on-car.jpg';
-
-            // single
-            {
-                let outputs = await segmenter(img);
-
-                let expected = [
-                    { label: 'Background' },
-                    { label: 'Hair' },
-                    { label: 'Upper-clothes' },
-                    { label: 'Pants' },
-                    { label: 'Left-shoe' },
-                    { label: 'Right-shoe' },
-                    { label: 'Face' },
-                    { label: 'Left-leg' },
-                    { label: 'Right-leg' },
-                    { label: 'Left-arm' },
-                    { label: 'Right-arm' },
-                ];
-
-                let outputLabels = outputs.map(x => x.label);
-                let expectedLabels = expected.map(x => x.label);
-
-                expect(outputLabels).toHaveLength(expectedLabels.length);
-                expect(outputLabels.sort()).toEqual(expectedLabels.sort())
-
-                // check that all scores are null, and masks have correct dimensions
-                for (let output of outputs) {
-                    expect(output.score).toBeNull();
-                    expect(output.mask.width).toEqual(970);
-                    expect(output.mask.height).toEqual(1455);
-                    expect(output.mask.channels).toEqual(1);
-                }
-            }
-
-            await segmenter.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Zero-shot image classification', () => {
-
-        // List all models which will be tested
-        const models = [
-            'openai/clip-vit-base-patch32',
-        ];
-
-        it(models[0], async () => {
-            let classifier = await pipeline('zero-shot-image-classification', m(models[0]));
-
-            let url = 'https://huggingface.co/datasets/mishig/sample_images/resolve/main/football-match.jpg';
-            let urls = [
-                'https://huggingface.co/datasets/mishig/sample_images/resolve/main/football-match.jpg',
-                'https://huggingface.co/datasets/mishig/sample_images/resolve/main/airport.jpg',
-                'https://huggingface.co/datasets/mishig/sample_images/resolve/main/savanna.jpg',
-            ]
-
-            let classes = ['football', 'airport', 'animals'];
-
-            // single
-            {
-                let output = await classifier(url, classes);
-
-                let expected = [
-                    { score: 0.9719080924987793, label: 'football' },
-                    { score: 0.022564826533198357, label: 'animals' },
-                    { score: 0.005527070723474026, label: 'airport' }
-                ]
-                compare(output, expected, 0.1);
-
-            }
-
-
-            // batched
-            {
-                let output = await classifier(urls, classes);
-
-                let expected = [
-                    [
-                        { score: 0.9712504148483276, label: 'football' },
-                        { score: 0.022469401359558105, label: 'animals' },
-                        { score: 0.006280169822275639, label: 'airport' }
-                    ], [
-                        { score: 0.997433602809906, label: 'airport' },
-                        { score: 0.0016500800848007202, label: 'animals' },
-                        { score: 0.0009163151844404638, label: 'football' }
-                    ], [
-                        { score: 0.9851226806640625, label: 'animals' },
-                        { score: 0.007516484707593918, label: 'football' },
-                        { score: 0.007360846735537052, label: 'airport' }
-                    ]
-                ];
-                compare(output, expected, 0.1);
-
-            }
-            await classifier.dispose();
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Object detection', () => {
-
-        // List all models which will be tested
-        const models = [
-            'facebook/detr-resnet-50',
-        ];
-
-        it(models[0], async () => {
-            let detector = await pipeline('object-detection', m(models[0]));
-
-            // TODO add batched test cases when supported
-            let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg';
-            let urls = ['https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/savanna.jpg']
-
-            // single + threshold
-            {
-                let output = await detector(url, {
-                    threshold: 0.9,
-                });
-
-                // let expected = [
-                //     {
-                //         "score": 0.9977124929428101,
-                //         "label": "remote",
-                //         "box": { "xmin": 41, "ymin": 70, "xmax": 176, "ymax": 118 }
-                //     },
-                //     {
-                //         "score": 0.9984639883041382,
-                //         "label": "remote",
-                //         "box": { "xmin": 332, "ymin": 73, "xmax": 369, "ymax": 188 }
-                //     },
-                //     {
-                //         "score": 0.9964856505393982,
-                //         "label": "couch",
-                //         "box": { "xmin": 0, "ymin": 1, "xmax": 639, "ymax": 474 }
-                //     },
-                //     {
-                //         "score": 0.9988334774971008,
-                //         "label": "cat",
-                //         "box": { "xmin": 11, "ymin": 51, "xmax": 314, "ymax": 472 }
-                //     },
-                //     {
-                //         "score": 0.9982513785362244,
-                //         "label": "cat",
-                //         "box": { "xmin": 345, "ymin": 22, "xmax": 640, "ymax": 371 }
-                //     }
-                // ]
-
-                expect(output.length).toBeGreaterThan(0);
-                for (let cls of output) {
-                    expect(typeof cls.score).toBe('number');
-                    expect(typeof cls.label).toBe('string');
-                    for (let key of ['xmin', 'ymin', 'xmax', 'ymax']) {
-                        expect(typeof cls.box[key]).toBe('number');
-                    }
-                }
-            }
-
-            // batched + threshold + percentage
-            {
-                let output = await detector(urls, {
-                    threshold: 0.9,
-                    percentage: true
-                });
-                // let expected = [[
-                //     {
-                //         score: 0.9991137385368347,
-                //         label: 'zebra',
-                //         box: { xmin: 0.65165576338768, ymin: 0.685152679681778, xmax: 0.723189502954483, ymax: 0.8801506459712982 }
-                //     },
-                //     {
-                //         score: 0.998811662197113,
-                //         label: 'zebra',
-                //         box: { xmin: 0.20797613263130188, ymin: 0.6543092578649521, xmax: 0.4147692620754242, ymax: 0.9040975719690323 }
-                //     },
-                //     {
-                //         score: 0.9707837104797363,
-                //         label: 'giraffe',
-                //         box: { xmin: 0.02498096227645874, ymin: 0.40549489855766296, xmax: 0.38669759035110474, ymax: 0.7895723879337311 }
-                //     },
-                //     {
-                //         score: 0.9984336495399475,
-                //         label: 'zebra',
-                //         box: { xmin: 0.3540637195110321, ymin: 0.6370827257633209, xmax: 0.5765090882778168, ymax: 0.8480959832668304 }
-                //     },
-                //     {
-                //         score: 0.9986463785171509,
-                //         label: 'giraffe',
-                //         box: { xmin: 0.6763969212770462, ymin: 0.25748637318611145, xmax: 0.974339172244072, ymax: 0.8684568107128143 }
-                //     }
-                // ]]
-
-                expect(output).toHaveLength(urls.length); // Same number of inputs as outputs
-
-                for (let i = 0; i < output.length; ++i) {
-                    expect(output[i].length).toBeGreaterThan(0);
-                    for (let cls of output[i]) {
-                        expect(typeof cls.score).toBe('number');
-                        expect(typeof cls.label).toBe('string');
-                        for (let key of ['xmin', 'ymin', 'xmax', 'ymax']) {
-                            expect(typeof cls.box[key]).toBe('number');
-                        }
-                    }
-                }
-
-
-            }
-
-            await detector.dispose();
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Zero-shot object detection', () => {
-
-        // List all models which will be tested
-        const models = [
-            'google/owlvit-base-patch32',
-        ];
-
-        it(models[0], async () => {
-            let detector = await pipeline('zero-shot-object-detection', m(models[0]));
-
-
-            // single (default)
-            {
-                let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/astronaut.png';
-                let candidate_labels = ['human face', 'rocket', 'helmet', 'american flag'];
-
-                let output = await detector(url, candidate_labels);
-
-                // let expected = [
-                //     {
-                //         score: 0.24392342567443848,
-                //         label: 'human face',
-                //         box: { xmin: 180, ymin: 67, xmax: 274, ymax: 175 }
-                //     },
-                //     {
-                //         score: 0.15129457414150238,
-                //         label: 'american flag',
-                //         box: { xmin: 0, ymin: 4, xmax: 106, ymax: 513 }
-                //     },
-                //     {
-                //         score: 0.13649864494800568,
-                //         label: 'helmet',
-                //         box: { xmin: 277, ymin: 337, xmax: 511, ymax: 511 }
-                //     },
-                //     {
-                //         score: 0.10262022167444229,
-                //         label: 'rocket',
-                //         box: { xmin: 352, ymin: -1, xmax: 463, ymax: 287 }
-                //     }
-                // ]
-
-                expect(output.length).toBeGreaterThan(0);
-                for (let cls of output) {
-                    expect(typeof cls.score).toBe('number');
-                    expect(typeof cls.label).toBe('string');
-                    for (let key of ['xmin', 'ymin', 'xmax', 'ymax']) {
-                        expect(typeof cls.box[key]).toBe('number');
-                    }
-                }
-            }
-
-            // topk + threshold + percentage
-            {
-                let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/beach.png';
-                let candidate_labels = ['hat', 'book', 'sunglasses', 'camera'];
-
-                let output = await detector(url, candidate_labels, {
-                    topk: 4,
-                    threshold: 0.05,
-                    percentage: true,
-                });
-
-                // let expected = [
-                //     {
-                //         score: 0.1606510728597641,
-                //         label: 'sunglasses',
-                //         box: { xmin: 347, ymin: 229, xmax: 429, ymax: 264 }
-                //     },
-                //     {
-                //         score: 0.08935828506946564,
-                //         label: 'hat',
-                //         box: { xmin: 38, ymin: 174, xmax: 258, ymax: 364 }
-                //     },
-                //     {
-                //         score: 0.08530698716640472,
-                //         label: 'camera',
-                //         box: { xmin: 187, ymin: 350, xmax: 260, ymax: 411 }
-                //     },
-                //     {
-                //         score: 0.08349756896495819,
-                //         label: 'book',
-                //         box: { xmin: 261, ymin: 280, xmax: 494, ymax: 425 }
-                //     }
-                // ]
-
-                expect(output.length).toBeGreaterThan(0);
-                for (let cls of output) {
-                    expect(typeof cls.score).toBe('number');
-                    expect(typeof cls.label).toBe('string');
-                    for (let key of ['xmin', 'ymin', 'xmax', 'ymax']) {
-                        expect(typeof cls.box[key]).toBe('number');
-                    }
-                }
-            }
-
-            await detector.dispose();
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Image-to-image', () => {
-
-        // List all models which will be tested
-        const models = [
-            'caidas/swin2SR-classical-sr-x2-64',
-        ];
-
-        it(models[0], async () => {
-            let upscaler = await pipeline('image-to-image', m(models[0]));
-
-            // Input is 3x3 => padded to 8x8 => upscaled to 16x16
-            let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/pattern_3x3.png';
-
-            // single
-            {
-                let outputs = await upscaler(url);
-                expect(outputs.width).toEqual(16);
-                expect(outputs.height).toEqual(16);
-                expect(outputs.channels).toEqual(3);
-                expect(outputs.data).toHaveLength(768);
-            }
-
-            // batched
-            {
-                let outputs = await upscaler([url, url]);
-                expect(outputs).toHaveLength(2);
-                for (let output of outputs) {
-                    expect(output.width).toEqual(16);
-                    expect(output.height).toEqual(16);
-                    expect(output.channels).toEqual(3);
-                    expect(output.data).toHaveLength(768);
-                }
-            }
-
-            await upscaler.dispose();
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-
-    describe('Depth estimation', () => {
-
-        // List all models which will be tested
-        const models = [
-            'Intel/dpt-hybrid-midas',
-        ];
-
-        it(models[0], async () => {
-            let depth_estimator = await pipeline('depth-estimation', m(models[0]));
-
-            let url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg';
-
-            // single
-            {
-                let { predicted_depth, depth } = await depth_estimator(url);
-                compare(predicted_depth.dims, [384, 384]);
-                expect(depth.width).toEqual(640);
-                expect(depth.height).toEqual(480);
-                expect(depth.channels).toEqual(1);
-                expect(depth.data).toHaveLength(307200);
-            }
-
-            // batched
-            {
-                let outputs = await depth_estimator([url, url]);
-                expect(outputs).toHaveLength(2);
-                for (let output of outputs) {
-                    let { predicted_depth, depth } = output;
-                    compare(predicted_depth.dims, [384, 384]);
-                    expect(depth.width).toEqual(640);
-                    expect(depth.height).toEqual(480);
-                    expect(depth.channels).toEqual(1);
-                    expect(depth.data).toHaveLength(307200);
-                }
-            }
-
-            await depth_estimator.dispose();
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Document question answering', () => {
-
-        // List all models which will be tested
-        const models = [
-            'naver-clova-ix/donut-base-finetuned-docvqa',
-        ];
-        const image = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/invoice.png';
-        const question = 'What is the invoice number?';
-
-        it(models[0], async () => {
-            let qa_pipeline = await pipeline('document-question-answering', m(models[0]));
-
-            // basic
-            {
-                let output = await qa_pipeline(image, question);
-                let expected = [{ answer: 'us-001' }];
-                compare(output, expected);
-            }
-
-            await qa_pipeline.dispose();
-
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
+          }
+        }
+
+        await detector.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Image-to-image", () => {
+    // List all models which will be tested
+    const models = ["Xenova/swin2SR-classical-sr-x2-64"];
+
+    it(
+      models[0],
+      async () => {
+        let upscaler = await pipeline("image-to-image", models[0]);
+
+        // Input is 3x3 => padded to 8x8 => upscaled to 16x16
+        let url = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/pattern_3x3.png";
+
+        // single
+        {
+          let outputs = await upscaler(url);
+          expect(outputs.width).toEqual(16);
+          expect(outputs.height).toEqual(16);
+          expect(outputs.channels).toEqual(3);
+          expect(outputs.data).toHaveLength(768);
+        }
+
+        // batched
+        {
+          let outputs = await upscaler([url, url]);
+          expect(outputs).toHaveLength(2);
+          for (let output of outputs) {
+            expect(output.width).toEqual(16);
+            expect(output.height).toEqual(16);
+            expect(output.channels).toEqual(3);
+            expect(output.data).toHaveLength(768);
+          }
+        }
+
+        await upscaler.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Depth estimation", () => {
+    // List all models which will be tested
+    const models = ["Xenova/dpt-hybrid-midas"];
+
+    it(
+      models[0],
+      async () => {
+        let depth_estimator = await pipeline("depth-estimation", models[0]);
+
+        let url = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg";
+
+        // single
+        {
+          let { predicted_depth, depth } = await depth_estimator(url);
+          compare(predicted_depth.dims, [384, 384]);
+          expect(depth.width).toEqual(640);
+          expect(depth.height).toEqual(480);
+          expect(depth.channels).toEqual(1);
+          expect(depth.data).toHaveLength(307200);
+        }
+
+        // batched
+        {
+          let outputs = await depth_estimator([url, url]);
+          expect(outputs).toHaveLength(2);
+          for (let output of outputs) {
+            let { predicted_depth, depth } = output;
+            compare(predicted_depth.dims, [384, 384]);
+            expect(depth.width).toEqual(640);
+            expect(depth.height).toEqual(480);
+            expect(depth.channels).toEqual(1);
+            expect(depth.data).toHaveLength(307200);
+          }
+        }
+
+        await depth_estimator.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Document question answering", () => {
+    // List all models which will be tested
+    const models = ["Xenova/donut-base-finetuned-docvqa"];
+    const image = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/invoice.png";
+    const question = "What is the invoice number?";
+
+    it(
+      models[0],
+      async () => {
+        let qa_pipeline = await pipeline("document-question-answering", models[0]);
+
+        // basic
+        {
+          let output = await qa_pipeline(image, question);
+          let expected = [{ answer: "us-001" }];
+          compare(output, expected);
+        }
+
+        await qa_pipeline.dispose();
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
 });
diff --git a/tests/processors.test.js b/tests/processors.test.js
index 6e072ca99..caf1ddf86 100644
--- a/tests/processors.test.js
+++ b/tests/processors.test.js
@@ -1,638 +1,1018 @@
-
-import { env, AutoProcessor, RawImage } from '../src/transformers.js';
-import { m, MAX_TEST_EXECUTION_TIME } from './init.js';
-import { compare } from './test_utils.js';
+import { env, AutoProcessor, RawImage } from "../src/transformers.js";
+import { init, MAX_TEST_EXECUTION_TIME } from "./init.js";
+import { compare } from "./test_utils.js";
 
 // Initialise the testing environment
+init();
 env.allowLocalModels = false;
 env.useFSCache = false;
 
-const sum = array => Number(array.reduce((a, b) => a + b, array instanceof BigInt64Array ? 0n : 0));
-const avg = array => sum(array) / array.length;
-
-describe('Processors', () => {
-
-    describe('Image processors', () => {
-
-        const IMAGE_CACHE = new Map();
-        const load_image = async (url) => {
-            const cached = IMAGE_CACHE.get(url);
-            if (cached) {
-                return cached;
-            }
-            const image = await RawImage.fromURL(url);
-            IMAGE_CACHE.set(url, image);
-            return image;
-        }
-
-        const MODELS = {
-            swin2sr: 'caidas/swin2SR-classical-sr-x2-64',
-            sam: 'facebook/sam-vit-base',
-            'donut-swin': 'naver-clova-ix/donut-base-finetuned-cord-v2',
-            resnet: 'microsoft/resnet-50',
-            vit: 'google/vit-base-patch16-224',
-            mobilevit: 'apple/mobilevit-small',
-            mobilevit_2: 'Xenova/quickdraw-mobilevit-small',
-            mobilevit_3: 'apple/mobilevitv2-1.0-imagenet1k-256',
-            deit: 'facebook/deit-tiny-distilled-patch16-224',
-            beit: 'microsoft/beit-base-patch16-224-pt22k-ft22k',
-            detr: 'facebook/detr-resnet-50',
-            yolos: 'hustvl/yolos-small-300',
-            dpt: 'Intel/dpt-hybrid-midas',
-            dpt_2: 'LiheYoung/depth-anything-small-hf',
-            glpn: 'vinvino02/glpn-kitti',
-            nougat: 'facebook/nougat-small',
-            owlvit: 'google/owlvit-base-patch32',
-            clip: 'openai/clip-vit-base-patch16',
-            vitmatte: 'hustvl/vitmatte-small-distinctions-646',
-            dinov2: 'facebook/dinov2-small-imagenet1k-1-layer',
-            efficientnet: 'google/efficientnet-b0',
-        }
-
-        const TEST_IMAGES = {
-            pattern_3x3: 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/pattern_3x3.png',
-            pattern_3x5: 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/pattern_3x5.png',
-            checkerboard_8x8: 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/checkerboard_8x8.png',
-            checkerboard_64x32: 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/checkerboard_64x32.png',
-            receipt: 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/receipt.png',
-            tiger: 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/tiger.jpg',
-            paper: 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/nougat_paper.png',
-            cats: 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg',
-
-            // grayscale image
-            skateboard: 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/ml-web-games/skateboard.png',
-
-            vitmatte_image: 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/vitmatte_image.png',
-            vitmatte_trimap: 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/vitmatte_trimap.png',
-        }
-
-        // Swin2SRImageProcessor
-        //  - tests when padding is a number (do_pad=true, pad_size=8)
-        it(MODELS.swin2sr, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.swin2sr))
-
-            { // Pad to multiple of 8 (3x3 -> 8x8)
-                const image = await load_image(TEST_IMAGES.pattern_3x3);
-                const { pixel_values } = await processor(image);
-
-                compare(pixel_values.dims, [1, 3, 8, 8]);
-                compare(avg(pixel_values.data), 0.5458333368102709);
-            }
-
-            { // Do not pad if already a multiple of 8 (8x8 -> 8x8)
-                const image = await load_image(TEST_IMAGES.checkerboard_8x8);
-                const { pixel_values } = await processor(image);
-                compare(pixel_values.dims, [1, 3, 8, 8]);
-                compare(avg(pixel_values.data), 0.5);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
-
-        // SamProcessor/SamImageProcessor
-        //  - tests normal padding (do_pad=true, pad_size={"height":1024,"width":1024})
-        //  - In addition to the image, pass in a list of points
-        it(MODELS.sam, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.sam))
-
-            { // without input points
-                const image = await load_image(TEST_IMAGES.pattern_3x3);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
-                compare(pixel_values.dims, [1, 3, 1024, 1024]);
-                compare(avg(pixel_values.data), -0.4505715670146813);
-
-                compare(original_sizes, [[3, 3]]);
-                compare(reshaped_input_sizes, [[1024, 1024]]);
-            }
-
-            { // with input points
-                const image = await load_image(TEST_IMAGES.pattern_3x3);
-                const { original_sizes, reshaped_input_sizes, input_points } = await processor(image, [[[1, 2]]]);
-
-                compare(original_sizes, [[3, 3]]);
-                compare(reshaped_input_sizes, [[1024, 1024]]);
-                compare(input_points.tolist(), [[[[341.3333, 682.6667]]]]);
-            }
-
-            { // multiple points with labels
-                const image = await load_image(TEST_IMAGES.pattern_3x3);
-                const { original_sizes, reshaped_input_sizes, input_points, input_labels } = await processor(image, [[[1, 2], [2, 1]]], [[1, 0]]);
-
-                compare(original_sizes, [[3, 3]]);
-                compare(reshaped_input_sizes, [[1024, 1024]]);
-                compare(input_points.tolist(), [[[[341.3333, 682.6667], [682.6667, 341.3333]]]]);
-                compare(input_labels.tolist(), [[[1n, 0n]]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
-
-        // DonutProcessor/DonutFeatureExtractor
-        //  - tests thumbnail resizing (do_thumbnail=true, size=[960, 1280])
-        //  - tests padding after normalization (image_mean=image_std=0.5)
-        it(MODELS['donut-swin'], async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS['donut-swin']))
-
-            {
-                const image = await load_image(TEST_IMAGES.receipt);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
-
-                compare(pixel_values.dims, [1, 3, 1280, 960]);
-                compare(avg(pixel_values.data), 0.1229388610053704);
-
-                compare(original_sizes, [[864, 576]]);
-                compare(reshaped_input_sizes, [[1280, 853]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
+const sum = (array) => Number(array.reduce((a, b) => a + b, array instanceof BigInt64Array ? 0n : 0));
+const avg = (array) => sum(array) / array.length;
+
+const IMAGE_CACHE = new Map();
+const load_image = async (url) => {
+  const cached = IMAGE_CACHE.get(url);
+  if (cached) {
+    return cached;
+  }
+  const image = await RawImage.fromURL(url);
+  IMAGE_CACHE.set(url, image);
+  return image;
+};
+
+const MODELS = {
+  swin2sr: "Xenova/swin2SR-classical-sr-x2-64",
+  sam: "Xenova/sam-vit-base",
+  "donut-swin": "Xenova/donut-base-finetuned-cord-v2",
+  resnet: "Xenova/resnet-50",
+  vit: "Xenova/vit-base-patch16-224",
+  mobilevit: "Xenova/mobilevit-small",
+  mobilevit_2: "Xenova/quickdraw-mobilevit-small",
+  mobilevit_3: "Xenova/mobilevitv2-1.0-imagenet1k-256",
+  deit: "Xenova/deit-tiny-distilled-patch16-224",
+  beit: "Xenova/beit-base-patch16-224-pt22k-ft22k",
+  detr: "Xenova/detr-resnet-50",
+  yolos: "Xenova/yolos-small-300",
+  dpt: "Xenova/dpt-hybrid-midas",
+  dpt_2: "Xenova/depth-anything-small-hf",
+  glpn: "Xenova/glpn-kitti",
+  nougat: "Xenova/nougat-small",
+  owlvit: "Xenova/owlvit-base-patch32",
+  clip: "Xenova/clip-vit-base-patch16",
+  vitmatte: "Xenova/vitmatte-small-distinctions-646",
+  dinov2: "Xenova/dinov2-small-imagenet1k-1-layer",
+  // efficientnet: 'Xenova/efficientnet-b0',
+  florence2: "Xenova/tiny-random-Florence2ForConditionalGeneration",
+};
+
+const TEST_IMAGES = {
+  pattern_3x3: "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/pattern_3x3.png",
+  pattern_3x5: "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/pattern_3x5.png",
+  checkerboard_8x8: "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/checkerboard_8x8.png",
+  checkerboard_64x32: "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/checkerboard_64x32.png",
+  receipt: "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/receipt.png",
+  tiger: "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/tiger.jpg",
+  paper: "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/nougat_paper.png",
+  cats: "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/cats.jpg",
+
+  // grayscale image
+  skateboard: "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/ml-web-games/skateboard.png",
+
+  vitmatte_image: "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/vitmatte_image.png",
+  vitmatte_trimap: "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/vitmatte_trimap.png",
+
+  beetle: "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/beetle.png",
+  book_cover: "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/book-cover.png",
+};
+
+describe("Processors", () => {
+  describe("Image processors", () => {
+    // Swin2SRImageProcessor
+    //  - tests when padding is a number (do_pad=true, pad_size=8)
+    it(
+      MODELS.swin2sr,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.swin2sr);
+
+        {
+          // Pad to multiple of 8 (3x3 -> 8x8)
+          const image = await load_image(TEST_IMAGES.pattern_3x3);
+          const { pixel_values } = await processor(image);
+
+          compare(pixel_values.dims, [1, 3, 8, 8]);
+          compare(avg(pixel_values.data), 0.5458333368102709);
+        }
 
-        // ConvNextFeatureExtractor
-        it(MODELS.resnet, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.resnet))
+        {
+          // Do not pad if already a multiple of 8 (8x8 -> 8x8)
+          const image = await load_image(TEST_IMAGES.checkerboard_8x8);
+          const { pixel_values } = await processor(image);
+          compare(pixel_values.dims, [1, 3, 8, 8]);
+          compare(avg(pixel_values.data), 0.5);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    // SamProcessor/SamImageProcessor
+    //  - tests normal padding (do_pad=true, pad_size={"height":1024,"width":1024})
+    //  - In addition to the image, pass in a list of points
+    it(
+      MODELS.sam,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.sam);
+
+        {
+          // without input points
+          const image = await load_image(TEST_IMAGES.pattern_3x3);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+          compare(pixel_values.dims, [1, 3, 1024, 1024]);
+          compare(avg(pixel_values.data), -0.4505715670146813);
+
+          compare(original_sizes, [[3, 3]]);
+          compare(reshaped_input_sizes, [[1024, 1024]]);
+        }
 
-            {
-                const image = await load_image(TEST_IMAGES.tiger);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+        {
+          // with input points
+          const image = await load_image(TEST_IMAGES.pattern_3x3);
+          const { original_sizes, reshaped_input_sizes, input_points } = await processor(image, {
+            input_points: [[[1, 2]]],
+          });
 
-                compare(pixel_values.dims, [1, 3, 224, 224]);
-                compare(avg(pixel_values.data), 0.06262318789958954);
+          compare(original_sizes, [[3, 3]]);
+          compare(reshaped_input_sizes, [[1024, 1024]]);
+          compare(input_points.tolist(), [[[[341.3333, 682.6667]]]]);
+        }
 
-                compare(original_sizes, [[408, 612]]);
-                compare(reshaped_input_sizes, [[224, 224]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
+        {
+          // multiple points with labels
+          const image = await load_image(TEST_IMAGES.pattern_3x3);
+          const { original_sizes, reshaped_input_sizes, input_points, input_labels } = await processor(image, {
+            input_points: [
+              [
+                [1, 2],
+                [2, 1],
+              ],
+            ],
+            input_labels: [[1, 0]],
+          });
+
+          compare(original_sizes, [[3, 3]]);
+          compare(reshaped_input_sizes, [[1024, 1024]]);
+          compare(input_points.tolist(), [
+            [
+              [
+                [341.3333, 682.6667],
+                [682.6667, 341.3333],
+              ],
+            ],
+          ]);
+          compare(input_labels.tolist(), [[[1n, 0n]]]);
+        }
 
-        // ViTFeatureExtractor
-        it(MODELS.vit, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.vit))
+        {
+          // with input boxes
+          const image = await load_image(TEST_IMAGES.pattern_3x3);
+          const { original_sizes, reshaped_input_sizes, input_boxes } = await processor(image, {
+            input_boxes: [[[0, 1, 2, 2]]],
+          });
 
-            {
-                const image = await load_image(TEST_IMAGES.tiger);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+          compare(original_sizes, [[3, 3]]);
+          compare(reshaped_input_sizes, [[1024, 1024]]);
+          compare(input_boxes.tolist(), [[[0, 341.3333, 682.6667, 682.6667]]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    // DonutProcessor/DonutFeatureExtractor
+    //  - tests thumbnail resizing (do_thumbnail=true, size=[960, 1280])
+    //  - tests padding after normalization (image_mean=image_std=0.5)
+    it(
+      MODELS["donut-swin"],
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS["donut-swin"]);
+
+        {
+          const image = await load_image(TEST_IMAGES.receipt);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+
+          compare(pixel_values.dims, [1, 3, 1280, 960]);
+          compare(avg(pixel_values.data), 0.1229388610053704);
+
+          compare(original_sizes, [[864, 576]]);
+          compare(reshaped_input_sizes, [[1280, 853]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
 
-                compare(pixel_values.dims, [1, 3, 224, 224]);
-                compare(avg(pixel_values.data), -0.22706867939852762);
+    // ConvNextFeatureExtractor
+    it(
+      MODELS.resnet,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.resnet);
 
-                compare(original_sizes, [[408, 612]]);
-                compare(reshaped_input_sizes, [[224, 224]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
+        {
+          const image = await load_image(TEST_IMAGES.tiger);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
 
-        // MobileViTFeatureExtractor
-        it(MODELS.mobilevit, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.mobilevit))
+          compare(pixel_values.dims, [1, 3, 224, 224]);
+          compare(avg(pixel_values.data), 0.06262318789958954);
 
-            {
-                const image = await load_image(TEST_IMAGES.tiger);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+          compare(original_sizes, [[408, 612]]);
+          compare(reshaped_input_sizes, [[224, 224]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
 
-                compare(pixel_values.dims, [1, 3, 256, 256]);
-                compare(avg(pixel_values.data), 0.4599160496887033);
+    // ViTFeatureExtractor
+    it(
+      MODELS.vit,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.vit);
 
-                compare(original_sizes, [[408, 612]]);
-                compare(reshaped_input_sizes, [[256, 256]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
+        {
+          const image = await load_image(TEST_IMAGES.tiger);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
 
-        // MobileViTFeatureExtractor
-        //  - tests not converting to rgb (do_convert_rgb=false)
-        it(MODELS.mobilevit_2, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.mobilevit_2))
+          compare(pixel_values.dims, [1, 3, 224, 224]);
+          compare(avg(pixel_values.data), -0.22706867939852762);
 
-            { // Tests grayscale image
-                const image = await load_image(TEST_IMAGES.skateboard);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+          compare(original_sizes, [[408, 612]]);
+          compare(reshaped_input_sizes, [[224, 224]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
 
-                compare(pixel_values.dims, [1, 1, 28, 28]);
-                compare(avg(pixel_values.data), 0.08558923671585128);
+    // MobileViTFeatureExtractor
+    it(
+      MODELS.mobilevit,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.mobilevit);
 
-                compare(original_sizes, [[28, 28]]);
-                compare(reshaped_input_sizes, [[28, 28]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
+        {
+          const image = await load_image(TEST_IMAGES.tiger);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
 
-        // MobileViTImageProcessor
-        //  - tests converting RGB to BGR (do_flip_channel_order=true)
-        it(MODELS.mobilevit_3, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.mobilevit_3))
+          compare(pixel_values.dims, [1, 3, 256, 256]);
+          compare(avg(pixel_values.data), 0.4599160496887033);
 
-            {
-                const image = await load_image(TEST_IMAGES.cats);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+          compare(original_sizes, [[408, 612]]);
+          compare(reshaped_input_sizes, [[256, 256]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    // MobileViTFeatureExtractor
+    //  - tests not converting to rgb (do_convert_rgb=false)
+    it(
+      MODELS.mobilevit_2,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.mobilevit_2);
+
+        {
+          // Tests grayscale image
+          const image = await load_image(TEST_IMAGES.skateboard);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+
+          compare(pixel_values.dims, [1, 1, 28, 28]);
+          compare(avg(pixel_values.data), 0.08558923671585128);
+
+          compare(original_sizes, [[28, 28]]);
+          compare(reshaped_input_sizes, [[28, 28]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
 
-                compare(pixel_values.dims, [1, 3, 256, 256]);
-                compare(avg(pixel_values.data), 0.5215385556221008);
+    // MobileViTImageProcessor
+    //  - tests converting RGB to BGR (do_flip_channel_order=true)
+    it(
+      MODELS.mobilevit_3,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.mobilevit_3);
 
-                compare(original_sizes, [[480, 640]]);
-                compare(reshaped_input_sizes, [[256, 256]]);
+        {
+          const image = await load_image(TEST_IMAGES.cats);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
 
-                // Ensure RGB to BGR conversion
-                compare(pixel_values.data.slice(0, 3), [0.24313725531101227, 0.250980406999588, 0.364705890417099]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
+          compare(pixel_values.dims, [1, 3, 256, 256]);
+          compare(avg(pixel_values.data), 0.5215385556221008);
 
-        // DeiTFeatureExtractor
-        it(MODELS.deit, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.deit))
+          compare(original_sizes, [[480, 640]]);
+          compare(reshaped_input_sizes, [[256, 256]]);
 
-            {
-                const image = await load_image(TEST_IMAGES.tiger);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
-
-                compare(pixel_values.dims, [1, 3, 224, 224]);
-                compare(avg(pixel_values.data), -0.2760336682859463);
+          // Ensure RGB to BGR conversion
+          compare(pixel_values.data.slice(0, 3), [0.24313725531101227, 0.250980406999588, 0.364705890417099]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
 
-                compare(original_sizes, [[408, 612]]);
-                compare(reshaped_input_sizes, [[224, 224]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
+    // DeiTFeatureExtractor
+    it(
+      MODELS.deit,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.deit);
 
-        // BeitFeatureExtractor
-        it(MODELS.beit, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.beit))
+        {
+          const image = await load_image(TEST_IMAGES.tiger);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
 
-            {
-                const image = await load_image(TEST_IMAGES.tiger);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+          compare(pixel_values.dims, [1, 3, 224, 224]);
+          compare(avg(pixel_values.data), -0.2760336682859463);
 
-                compare(pixel_values.dims, [1, 3, 224, 224]);
-                compare(avg(pixel_values.data), -0.22706867939852762);
+          compare(original_sizes, [[408, 612]]);
+          compare(reshaped_input_sizes, [[224, 224]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
 
-                compare(original_sizes, [[408, 612]]);
-                compare(reshaped_input_sizes, [[224, 224]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
+    // BeitFeatureExtractor
+    it(
+      MODELS.beit,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.beit);
 
+        {
+          const image = await load_image(TEST_IMAGES.tiger);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
 
-        // DetrFeatureExtractor
-        it(MODELS.detr, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.detr))
+          compare(pixel_values.dims, [1, 3, 224, 224]);
+          compare(avg(pixel_values.data), -0.22706867939852762);
 
-            {
-                const image = await load_image(TEST_IMAGES.tiger);
-                const { pixel_values, original_sizes, reshaped_input_sizes, pixel_mask } = await processor(image);
+          compare(original_sizes, [[408, 612]]);
+          compare(reshaped_input_sizes, [[224, 224]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
 
-                compare(pixel_values.dims, [1, 3, 888, 1333]);
-                compare(avg(pixel_values.data), -0.27840224131001773);
+    // DetrFeatureExtractor
+    it(
+      MODELS.detr,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.detr);
 
-                compare(original_sizes, [[408, 612]]);
-                compare(reshaped_input_sizes, [[888, 1333]]);
+        {
+          const image = await load_image(TEST_IMAGES.tiger);
+          const { pixel_values, original_sizes, reshaped_input_sizes, pixel_mask } = await processor(image);
 
-                compare(pixel_mask.dims, [1, 64, 64]);
-                compare(avg(pixel_mask.data), 1);
+          compare(pixel_values.dims, [1, 3, 888, 1333]);
+          compare(avg(pixel_values.data), -0.27840224131001773);
 
-            }
-        }, MAX_TEST_EXECUTION_TIME);
+          compare(original_sizes, [[408, 612]]);
+          compare(reshaped_input_sizes, [[888, 1333]]);
 
+          compare(pixel_mask.dims, [1, 64, 64]);
+          compare(avg(pixel_mask.data), 1);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
 
-        // YolosFeatureExtractor
-        it(MODELS.yolos, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.yolos))
+    // YolosFeatureExtractor
+    it(
+      MODELS.yolos,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.yolos);
 
-            {
-                const image = await load_image(TEST_IMAGES.tiger);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+        {
+          const image = await load_image(TEST_IMAGES.tiger);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
 
-                compare(pixel_values.dims, [1, 3, 888, 1333]);
-                compare(avg(pixel_values.data), -0.27840224131001773);
+          compare(pixel_values.dims, [1, 3, 888, 1333]);
+          compare(avg(pixel_values.data), -0.27840224131001773);
 
-                compare(original_sizes, [[408, 612]]);
-                compare(reshaped_input_sizes, [[888, 1333]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
+          compare(original_sizes, [[408, 612]]);
+          compare(reshaped_input_sizes, [[888, 1333]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    // DPTFeatureExtractor
+    it(
+      MODELS.dpt,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.dpt);
+
+        {
+          // Tests grayscale image
+          const image = await load_image(TEST_IMAGES.cats);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+
+          compare(pixel_values.dims, [1, 3, 384, 384]);
+          compare(avg(pixel_values.data), 0.0372855559389454);
+
+          compare(original_sizes, [[480, 640]]);
+          compare(reshaped_input_sizes, [[384, 384]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    // GLPNForDepthEstimation
+    //  - tests `size_divisor` and no size (size_divisor=32)
+    it(
+      MODELS.glpn,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.glpn);
+
+        {
+          const image = await load_image(TEST_IMAGES.cats);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+          compare(pixel_values.dims, [1, 3, 480, 640]);
+          compare(avg(pixel_values.data), 0.5186172404123327);
+
+          compare(original_sizes, [[480, 640]]);
+          compare(reshaped_input_sizes, [[480, 640]]);
+        }
 
-        // DPTFeatureExtractor
-        it(MODELS.dpt, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.dpt))
+        {
+          // Tests input which is not a multiple of 32 ([408, 612] -> [384, 608])
+          const image = await load_image(TEST_IMAGES.tiger);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
 
-            { // Tests grayscale image
-                const image = await load_image(TEST_IMAGES.cats);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+          compare(pixel_values.dims, [1, 3, 384, 608]);
+          compare(avg(pixel_values.data), 0.38628831535989555);
 
-                compare(pixel_values.dims, [1, 3, 384, 384]);
-                compare(avg(pixel_values.data), 0.0372855559389454);
+          compare(original_sizes, [[408, 612]]);
+          compare(reshaped_input_sizes, [[384, 608]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    // NougatImageProcessor
+    //  - tests padding after normalization (image_mean != 0.5, image_std != 0.5)
+    it(
+      MODELS.nougat,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.nougat);
+
+        {
+          const image = await load_image(TEST_IMAGES.paper);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+
+          compare(pixel_values.dims, [1, 3, 896, 672]);
+          compare(avg(pixel_values.data), 1.8447155005897355);
+
+          compare(original_sizes, [[850, 685]]);
+          compare(reshaped_input_sizes, [[833, 672]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    // OwlViTFeatureExtractor
+    it(MODELS.owlvit, async () => {
+      const processor = await AutoProcessor.from_pretrained(MODELS.owlvit);
+      {
+        const image = await load_image(TEST_IMAGES.cats);
+        const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+
+        compare(pixel_values.dims, [1, 3, 768, 768]);
+        compare(avg(pixel_values.data), 0.250620447910435);
+
+        compare(original_sizes, [[480, 640]]);
+        compare(reshaped_input_sizes, [[768, 768]]);
+      }
+    });
 
-                compare(original_sizes, [[480, 640]]);
-                compare(reshaped_input_sizes, [[384, 384]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
+    // CLIPFeatureExtractor
+    //  - tests center crop (do_center_crop=true, crop_size=224)
+    it(
+      MODELS.clip,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.clip);
 
-        // GLPNForDepthEstimation
-        //  - tests `size_divisor` and no size (size_divisor=32)
-        it(MODELS.glpn, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.glpn))
+        {
+          const image = await load_image(TEST_IMAGES.tiger);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
 
-            {
-                const image = await load_image(TEST_IMAGES.cats);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
-                compare(pixel_values.dims, [1, 3, 480, 640]);
-                compare(avg(pixel_values.data), 0.5186172404123327);
+          compare(pixel_values.dims, [1, 3, 224, 224]);
+          compare(avg(pixel_values.data), -0.06678297738282096);
 
-                compare(original_sizes, [[480, 640]]);
-                compare(reshaped_input_sizes, [[480, 640]]);
-            }
+          compare(original_sizes, [[408, 612]]);
+          compare(reshaped_input_sizes, [[224, 224]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    // VitMatteImageProcessor
+    //  - tests custom overrides
+    //  - tests multiple inputs
+    //  - tests `size_divisibility` and no size (size_divisibility=32)
+    //  - tests do_pad and `size_divisibility`
+    it(
+      MODELS.vitmatte,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.vitmatte);
+
+        {
+          const image = await load_image(TEST_IMAGES.vitmatte_image);
+          const image2 = await load_image(TEST_IMAGES.vitmatte_trimap);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image, image2);
+
+          compare(pixel_values.dims, [1, 4, 640, 960]);
+          expect(avg(pixel_values.data)).toBeCloseTo(-0.4028555154800415);
+          expect(pixel_values.data[0]).toBeCloseTo(-0.9921568632125854);
+          expect(pixel_values.data[1]).toBeCloseTo(-0.9921568632125854);
+          expect(pixel_values.data[5]).toBeCloseTo(-1.0);
+          expect(pixel_values.data[640]).toBeCloseTo(-0.6784313917160034);
+          expect(pixel_values.data[641]).toBeCloseTo(-0.6705882549285889);
+          expect(pixel_values.data[640 * 960]).toBeCloseTo(-1.0);
+          expect(pixel_values.data[640 * 960 + 1]).toBeCloseTo(-1.0);
+          expect(pixel_values.data.at(-1)).toBeCloseTo(0.0);
+
+          compare(original_sizes, [[640, 960]]);
+          compare(reshaped_input_sizes, [[640, 960]]);
+        }
 
-            { // Tests input which is not a multiple of 32 ([408, 612] -> [384, 608])
-                const image = await load_image(TEST_IMAGES.tiger);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+        {
+          const image = await load_image(TEST_IMAGES.pattern_3x5);
+          const image2 = await load_image(TEST_IMAGES.pattern_3x5);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image, image2);
+
+          compare(pixel_values.dims, [1, 4, 32, 32]);
+          expect(avg(pixel_values.data)).toBeCloseTo(-0.00867417361587286);
+          expect(pixel_values.data[0]).toBeCloseTo(-0.9921568632125854);
+          expect(pixel_values.data[1]).toBeCloseTo(-0.9686274528503418);
+          expect(pixel_values.data[5]).toBeCloseTo(0.0);
+          expect(pixel_values.data[32]).toBeCloseTo(-0.9215686321258545);
+          expect(pixel_values.data[33]).toBeCloseTo(-0.8980392217636108);
+          expect(pixel_values.data.at(-1)).toBeCloseTo(0.0);
+
+          compare(original_sizes, [[5, 3]]);
+          compare(reshaped_input_sizes, [[5, 3]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
 
-                compare(pixel_values.dims, [1, 3, 384, 608]);
-                compare(avg(pixel_values.data), 0.38628831535989555);
+    // BitImageProcessor
+    it(
+      MODELS.dinov2,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.dinov2);
 
-                compare(original_sizes, [[408, 612]]);
-                compare(reshaped_input_sizes, [[384, 608]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
+        {
+          const image = await load_image(TEST_IMAGES.tiger);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
 
-        // NougatImageProcessor
-        //  - tests padding after normalization (image_mean != 0.5, image_std != 0.5)
-        it(MODELS.nougat, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.nougat))
+          compare(pixel_values.dims, [1, 3, 224, 224]);
+          compare(avg(pixel_values.data), 0.06262318789958954);
 
-            {
-                const image = await load_image(TEST_IMAGES.paper);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+          compare(original_sizes, [[408, 612]]);
+          compare(reshaped_input_sizes, [[224, 224]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    // DPTImageProcessor
+    //  - tests ensure_multiple_of
+    //  - tests keep_aspect_ratio
+    //  - tests bankers rounding
+    it(
+      MODELS.dpt_2,
+      async () => {
+        const processor = await AutoProcessor.from_pretrained(MODELS.dpt_2);
+
+        {
+          const image = await load_image(TEST_IMAGES.cats);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+
+          compare(pixel_values.dims, [1, 3, 518, 686]);
+          compare(avg(pixel_values.data), 0.30337387323379517);
+
+          compare(original_sizes, [[480, 640]]);
+          compare(reshaped_input_sizes, [[518, 686]]);
+        }
 
-                compare(pixel_values.dims, [1, 3, 896, 672]);
-                compare(avg(pixel_values.data), 1.8447155005897355);
+        {
+          const image = await load_image(TEST_IMAGES.checkerboard_64x32);
+          const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
 
-                compare(original_sizes, [[850, 685]]);
-                compare(reshaped_input_sizes, [[833, 672]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
+          // NOTE: without bankers rounding, this would be [1, 3, 266, 518]
+          compare(pixel_values.dims, [1, 3, 252, 518]);
+          compare(avg(pixel_values.data), 0.2267402559518814);
 
-        // OwlViTFeatureExtractor
-        it(MODELS.owlvit, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.owlvit))
-            {
-                const image = await load_image(TEST_IMAGES.cats);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+          compare(original_sizes, [[32, 64]]);
+          compare(reshaped_input_sizes, [[252, 518]]);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    // TODO: Add back
+    // // EfficientNetImageProcessor
+    // //  - tests include_top
+    // it(MODELS.efficientnet, async () => {
+    //     const processor = await AutoProcessor.from_pretrained(MODELS.efficientnet)
+
+    //     {
+    //         const image = await load_image(TEST_IMAGES.cats);
+    //         const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
+
+    //         compare(pixel_values.dims, [1, 3, 224, 224]);
+    //         compare(avg(pixel_values.data), 0.3015307230282871);
+
+    //         compare(original_sizes, [[480, 640]]);
+    //         compare(reshaped_input_sizes, [[224, 224]]);
+    //     }
+    // }, MAX_TEST_EXECUTION_TIME);
+  });
+
+  describe("Audio processors", () => {
+    const audioPromise = new Promise(async (resolve) => {
+      const url = "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/mlk.npy";
+      const buffer = await (await fetch(url)).arrayBuffer();
+      const audio = Float32Array.from(new Float64Array(buffer));
+      resolve(audio);
+    });
 
-                compare(pixel_values.dims, [1, 3, 768, 768]);
-                compare(avg(pixel_values.data), 0.250620447910435);
+    it(
+      "WhisperFeatureExtractor",
+      async () => {
+        const audio = await audioPromise;
+        const processor = await AutoProcessor.from_pretrained("Xenova/whisper-tiny.en");
+        const { input_features } = await processor(audio);
+        compare(input_features.dims, [1, 80, 3000]);
+        expect(avg(input_features.data)).toBeCloseTo(-0.2813588131551941);
+        expect(input_features.data[0]).toBeCloseTo(0.33168578147888184);
+        expect(input_features.data[1]).toBeCloseTo(0.30986475944519043);
+        expect(input_features.data[81]).toBeCloseTo(0.10727232694625854);
+        expect(input_features.data[3001]).toBeCloseTo(0.2555035352706909);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      "ASTFeatureExtractor",
+      async () => {
+        const audio = await audioPromise;
+        const processor = await AutoProcessor.from_pretrained("Xenova/ast-finetuned-audioset-10-10-0.4593");
+        {
+          // truncation
+          const { input_values } = await processor(audio);
+          compare(input_values.dims, [1, 1024, 128]);
+
+          expect(avg(input_values.data)).toBeCloseTo(-0.04054912979309085);
+          expect(input_values.data[0]).toBeCloseTo(-0.5662586092948914);
+          expect(input_values.data[1]).toBeCloseTo(-1.0300861597061157);
+          expect(input_values.data[129]).toBeCloseTo(-1.084834098815918);
+          expect(input_values.data[1025]).toBeCloseTo(-1.1204065084457397);
+        }
+        {
+          // padding
+          const { input_values } = await processor(audio.slice(0, 1000));
+          compare(input_values.dims, [1, 1024, 128]); // [1, 4, 128] -> (padded to) -> [1, 1024, 128]
+
+          expect(avg(input_values.data)).toBeCloseTo(0.4647964835166931);
+          expect(input_values.data[0]).toBeCloseTo(-0.5662586092948914);
+          expect(input_values.data[1]).toBeCloseTo(-1.0300861597061157);
+          expect(input_values.data[129]).toBeCloseTo(-1.084834098815918);
+
+          // padded values
+          expect(input_values.data[1025]).toBeCloseTo(0.46703237295150757);
+          expect(input_values.data[2049]).toBeCloseTo(0.46703237295150757);
+          expect(input_values.data[10000]).toBeCloseTo(0.46703237295150757);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      "SeamlessM4TFeatureExtractor",
+      async () => {
+        const audio = await audioPromise;
+        const processor = await AutoProcessor.from_pretrained("Xenova/wav2vec2-bert-CV16-en");
+        {
+          // normal
+          const { input_features, attention_mask } = await processor(audio);
+          compare(input_features.dims, [1, 649, 160]);
+          compare(attention_mask.dims, [1, 649]);
+
+          expect(avg(input_features.data)).toBeCloseTo(-2.938903875815413e-8);
+          expect(input_features.data[0]).toBeCloseTo(1.1939343214035034);
+          expect(input_features.data[1]).toBeCloseTo(0.7874255180358887);
+          expect(input_features.data[160]).toBeCloseTo(-0.712975025177002);
+          expect(input_features.data[161]).toBeCloseTo(0.045802414417266846);
+          expect(input_features.data.at(-1)).toBeCloseTo(-1.3328346014022827);
+
+          expect(sum(attention_mask.data)).toEqual(649);
+        }
+        {
+          // padding (pad_to_multiple_of=2)
+          const { input_features, attention_mask } = await processor(audio.slice(0, 10000));
+
+          // [1, 61, 80] -> [1, 62, 80] -> [1, 31, 160]
+          compare(input_features.dims, [1, 31, 160]);
+          compare(attention_mask.dims, [1, 31]);
+
+          expect(avg(input_features.data)).toBeCloseTo(0.01612919569015503);
+          expect(input_features.data[0]).toBeCloseTo(0.9657132029533386);
+          expect(input_features.data[1]).toBeCloseTo(0.12912897765636444);
+          expect(input_features.data[160]).toBeCloseTo(-1.2364212274551392);
+          expect(input_features.data[161]).toBeCloseTo(-0.9703778028488159);
+          expect(input_features.data.at(-1)).toBeCloseTo(1); // padding value
+
+          expect(sum(attention_mask.data)).toEqual(30);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      "ClapFeatureExtractor",
+      async () => {
+        const audio = await audioPromise;
+        const processor = await AutoProcessor.from_pretrained("Xenova/clap-htsat-unfused");
+        {
+          // truncation
+          // Since truncation uses a random strategy, we override
+          // Math.random to ensure that the test is deterministic
+          const originalRandom = Math.random;
+          Math.random = () => 0.5;
+
+          let long_audio = new Float32Array(500000);
+          long_audio.set(audio);
+          long_audio.set(audio, long_audio.length - audio.length);
+
+          const { input_features } = await processor(long_audio);
+          compare(input_features.dims, [1, 1, 1001, 64]);
+
+          expect(avg(input_features.data)).toBeCloseTo(-37.94569396972656);
+          expect(input_features.data[0]).toBeCloseTo(-53.32647705078125);
+          expect(input_features.data[1]).toBeCloseTo(-47.76755142211914);
+          expect(input_features.data[65]).toBeCloseTo(-36.32261276245117);
+          expect(input_features.data[1002]).toBeCloseTo(-28.0314884185791);
+          expect(input_features.data[10000]).toBeCloseTo(-21.905902862548828);
+          expect(input_features.data[60000]).toBeCloseTo(-14.877863883972168);
+          expect(input_features.data[64062]).toBeCloseTo(-37.9784049987793);
+          expect(input_features.data[64063]).toBeCloseTo(-37.73963928222656);
+
+          // Reset Math.random
+          Math.random = originalRandom;
+        }
+        {
+          // padding
+          const { input_features } = await processor(audio);
+          compare(input_features.dims, [1, 1, 1001, 64]);
+
+          expect(avg(input_features.data)).toBeCloseTo(-34.99049377441406);
+          expect(input_features.data[0]).toBeCloseTo(-21.32573890686035);
+          expect(input_features.data[1]).toBeCloseTo(-26.168411254882812);
+          expect(input_features.data[65]).toBeCloseTo(-29.716018676757812);
+          expect(input_features.data[1002]).toBeCloseTo(-32.16273498535156);
+          expect(input_features.data[10000]).toBeCloseTo(-19.9283390045166);
+
+          // padded values
+          expect(input_features.data[60000]).toBeCloseTo(-100.0);
+          expect(input_features.data[64062]).toBeCloseTo(-100.0);
+          expect(input_features.data[64063]).toBeCloseTo(-100.0);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      "WeSpeakerFeatureExtractor",
+      async () => {
+        const processor = await AutoProcessor.from_pretrained("onnx-community/wespeaker-voxceleb-resnet34-LM");
+        {
+          // default
+          const audio = new Float32Array(16000).map((_, i) => Math.sin(i / 100));
+          const { input_features } = await processor(audio);
+          compare(input_features.dims, [1, 98, 80]);
+
+          expect(avg(input_features.data)).toBeCloseTo(5.461731689138105e-8);
+          expect(input_features.data[0]).toBeCloseTo(-0.19300270080566406);
+          expect(input_features.data[1]).toBeCloseTo(-0.05825042724609375);
+          expect(input_features.data[78]).toBeCloseTo(0.2683420181274414);
+          expect(input_features.data[79]).toBeCloseTo(0.26250171661376953);
+          expect(input_features.data[80]).toBeCloseTo(0.19062232971191406);
+          expect(input_features.data.at(-2)).toBeCloseTo(-0.43694400787353516);
+          expect(input_features.data.at(-1)).toBeCloseTo(-0.4266204833984375);
+        }
 
-                compare(original_sizes, [[480, 640]]);
-                compare(reshaped_input_sizes, [[768, 768]]);
-            }
+        {
+          // pad to `min_num_frames`
+          const audio = new Float32Array(3).map((_, i) => Math.sin(i / 100));
+          const { input_features } = await processor(audio);
+          compare(input_features.dims, [1, 9, 80]);
+
+          expect(avg(input_features.data)).toBeCloseTo(-0.0000010093053181966146);
+          expect(input_features.data[0]).toBeCloseTo(20.761859893798828);
+          expect(input_features.data[1]).toBeCloseTo(21.02924346923828);
+          expect(input_features.data[78]).toBeCloseTo(19.083993911743164);
+          expect(input_features.data[79]).toBeCloseTo(18.003454208374023);
+          expect(input_features.data[80]).toBeCloseTo(-2.595233917236328);
+          expect(input_features.data.at(-2)).toBeCloseTo(-2.385499954223633);
+          expect(input_features.data.at(-1)).toBeCloseTo(-2.2504329681396484);
+        }
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Other processors", () => {
+    describe(
+      "FlorenceProcessor",
+      () => {
+        /** @type {import('../src/processors.js').Florence2Processor} */
+        let processor;
+        let images = {};
+
+        beforeAll(async () => {
+          processor = await AutoProcessor.from_pretrained(MODELS.florence2);
+          images = {
+            beetle: await load_image(TEST_IMAGES.beetle),
+            book_cover: await load_image(TEST_IMAGES.book_cover),
+          };
         });
 
-        // CLIPFeatureExtractor
-        //  - tests center crop (do_center_crop=true, crop_size=224)
-        it(MODELS.clip, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.clip))
+        describe("Prompt construction", () => {
+          it("Construct prompt", async () => {
+            const text = "<OD>";
+            const prompts = processor.construct_prompts(text);
+            const target = ["Locate the objects with category name in the image."];
+            compare(prompts, target);
+          });
+
+          it("Construct prompts", async () => {
+            const texts = ["<MORE_DETAILED_CAPTION>", "Locate the objects with category name in the image.", "<OPEN_VOCABULARY_DETECTION>cat"];
+            const prompts = processor.construct_prompts(texts);
+            const target = ["Describe with a paragraph what is shown in the image.", "Locate the objects with category name in the image.", "Locate cat in the image."];
+            compare(prompts, target);
+          });
+        });
 
+        describe("Post-process generation", () => {
+          const TESTS = [
             {
-                const image = await load_image(TEST_IMAGES.tiger);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
-
-                compare(pixel_values.dims, [1, 3, 224, 224]);
-                compare(avg(pixel_values.data), -0.06678297738282096);
-
-                compare(original_sizes, [[408, 612]]);
-                compare(reshaped_input_sizes, [[224, 224]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
-
-        // VitMatteImageProcessor
-        //  - tests custom overrides
-        //  - tests multiple inputs
-        //  - tests `size_divisibility` and no size (size_divisibility=32)
-        //  - tests do_pad and `size_divisibility`
-        it(MODELS.vitmatte, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.vitmatte))
-
+              task: "<CAPTION>",
+              generated_text: "</s><s>A green car parked in front of a yellow building.</s>",
+              target: { "<CAPTION>": "A green car parked in front of a yellow building." },
+              image: "beetle",
+            },
             {
-                const image = await load_image(TEST_IMAGES.vitmatte_image);
-                const image2 = await load_image(TEST_IMAGES.vitmatte_trimap);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image, image2);
-
-                compare(pixel_values.dims, [1, 4, 640, 960]);
-                expect(avg(pixel_values.data)).toBeCloseTo(-0.4028555154800415);
-                expect(pixel_values.data[0]).toBeCloseTo(-0.9921568632125854);
-                expect(pixel_values.data[1]).toBeCloseTo(-0.9921568632125854);
-                expect(pixel_values.data[5]).toBeCloseTo(-1.0);
-                expect(pixel_values.data[640]).toBeCloseTo(-0.6784313917160034);
-                expect(pixel_values.data[641]).toBeCloseTo(-0.6705882549285889);
-                expect(pixel_values.data[640 * 960]).toBeCloseTo(-1.0);
-                expect(pixel_values.data[640 * 960 + 1]).toBeCloseTo(-1.0);
-                expect(pixel_values.data.at(-1)).toBeCloseTo(0.0);
-
-                compare(original_sizes, [[640, 960]]);
-                compare(reshaped_input_sizes, [[640, 960]]);
-            }
-
-
+              task: "<DETAILED_CAPTION>",
+              generated_text: "</s><s>The image shows a green Volkswagen Beetle parked in front of a yellow building with two brown doors. The sky is a mix of blue and white, and there are a few green trees in the background.</s>",
+              target: { "<DETAILED_CAPTION>": "The image shows a green Volkswagen Beetle parked in front of a yellow building with two brown doors. The sky is a mix of blue and white, and there are a few green trees in the background." },
+              image: "beetle",
+            },
             {
-                const image = await load_image(TEST_IMAGES.pattern_3x5);
-                const image2 = await load_image(TEST_IMAGES.pattern_3x5);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image, image2);
-
-                compare(pixel_values.dims, [1, 4, 32, 32]);
-                expect(avg(pixel_values.data)).toBeCloseTo(-0.00867417361587286);
-                expect(pixel_values.data[0]).toBeCloseTo(-0.9921568632125854);
-                expect(pixel_values.data[1]).toBeCloseTo(-0.9686274528503418);
-                expect(pixel_values.data[5]).toBeCloseTo(0.0);
-                expect(pixel_values.data[32]).toBeCloseTo(-0.9215686321258545);
-                expect(pixel_values.data[33]).toBeCloseTo(-0.8980392217636108);
-                expect(pixel_values.data.at(-1)).toBeCloseTo(0.0);
-
-                compare(original_sizes, [[5, 3]]);
-                compare(reshaped_input_sizes, [[5, 3]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
-
-        // BitImageProcessor
-        it(MODELS.dinov2, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.dinov2))
-
+              task: "<MORE_DETAILED_CAPTION>",
+              generated_text: "</s><s>The image shows a vintage Volkswagen Beetle car parked on a cobblestone street in front of a yellow building with two wooden doors. The car is painted in a bright turquoise color and has a white stripe running along the side. It has two doors on either side of the car, one on top of the other, and a small window on the front. The building appears to be old and dilapidated, with peeling paint and crumbling walls. The sky is blue and there are trees in the background.</s>",
+              target: { "<MORE_DETAILED_CAPTION>": "The image shows a vintage Volkswagen Beetle car parked on a cobblestone street in front of a yellow building with two wooden doors. The car is painted in a bright turquoise color and has a white stripe running along the side. It has two doors on either side of the car, one on top of the other, and a small window on the front. The building appears to be old and dilapidated, with peeling paint and crumbling walls. The sky is blue and there are trees in the background." },
+              image: "beetle",
+            },
             {
-                const image = await load_image(TEST_IMAGES.tiger);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
-
-                compare(pixel_values.dims, [1, 3, 224, 224]);
-                compare(avg(pixel_values.data), 0.06262318789958954);
-
-                compare(original_sizes, [[408, 612]]);
-                compare(reshaped_input_sizes, [[224, 224]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
-
-        // DPTImageProcessor
-        //  - tests ensure_multiple_of
-        //  - tests keep_aspect_ratio
-        //  - tests bankers rounding
-        it(MODELS.dpt_2, async () => {
-            const processor = await AutoProcessor.from_pretrained(m(MODELS.dpt_2))
-
+              task: "<OD>",
+              generated_text: "</s><s><s><s>car<loc_53><loc_333><loc_933><loc_774>door<loc_712><loc_203><loc_906><loc_545>wheel<loc_704><loc_576><loc_866><loc_772><loc_149><loc_584><loc_310><loc_773></s>",
+              target: {
+                "<OD>": {
+                  bboxes: [
+                    [34.24, 160.08, 597.44, 371.76],
+                    [456.0, 97.68, 580.16, 261.84],
+                    [450.88, 276.72, 554.56, 370.8],
+                    [95.68, 280.56, 198.72, 371.28],
+                  ],
+                  labels: ["car", "door", "wheel", "wheel"],
+                },
+              },
+              image: "beetle",
+            },
             {
-                const image = await load_image(TEST_IMAGES.cats);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
-
-                compare(pixel_values.dims, [1, 3, 518, 686]);
-                compare(avg(pixel_values.data), 0.30337387323379517);
-
-                compare(original_sizes, [[480, 640]]);
-                compare(reshaped_input_sizes, [[518, 686]]);
-            }
-
+              task: "<DENSE_REGION_CAPTION>",
+              generated_text: "</s><s>turquoise Volkswagen Beetle<loc_52><loc_333><loc_932><loc_774>wheel<loc_704><loc_576><loc_864><loc_772><loc_148><loc_584><loc_308><loc_773></s>",
+              target: {
+                "<DENSE_REGION_CAPTION>": {
+                  bboxes: [
+                    [33.6, 160.08, 596.8, 371.76],
+                    [450.88, 276.72, 553.28, 370.8],
+                    [95.04, 280.56, 197.44, 371.28],
+                  ],
+                  labels: ["turquoise Volkswagen Beetle", "wheel", "wheel"],
+                },
+              },
+              image: "beetle",
+            },
             {
-                const image = await load_image(TEST_IMAGES.checkerboard_64x32);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
-
-                // NOTE: without bankers rounding, this would be [1, 3, 266, 518]
-                compare(pixel_values.dims, [1, 3, 252, 518]);
-                compare(avg(pixel_values.data), 0.2267402559518814);
-
-                compare(original_sizes, [[32, 64]]);
-                compare(reshaped_input_sizes, [[252, 518]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
-
-        // EfficientNetImageProcessor
-        //  - tests include_top
-        it(MODELS.efficientnet, async () => {
-            const processor = await AutoProcessor.from_pretrained(MODELS.efficientnet)
-
+              task: "<REGION_PROPOSAL>",
+              generated_text: "</s><s><s><s><loc_52><loc_333><loc_932><loc_774><loc_711><loc_203><loc_905><loc_545><loc_704><loc_576><loc_864><loc_772><loc_148><loc_584><loc_309><loc_773><loc_354><loc_184><loc_519><loc_342><loc_102><loc_555><loc_135><loc_616><loc_424><loc_503><loc_472><loc_514><loc_637><loc_642><loc_646><loc_668></s>",
+              target: {
+                "<REGION_PROPOSAL>": {
+                  bboxes: [
+                    [33.6, 160.08, 596.8, 371.76],
+                    [455.36, 97.68, 579.52, 261.84],
+                    [450.88, 276.72, 553.28, 370.8],
+                    [95.04, 280.56, 198.08, 371.28],
+                    [226.88, 88.56, 332.48, 164.4],
+                    [65.6, 266.64, 86.72, 295.92],
+                    [271.68, 241.68, 302.4, 246.96],
+                    [408.0, 308.4, 413.76, 320.88],
+                  ],
+                  labels: ["", "", "", "", "", "", "", ""],
+                },
+              },
+              image: "beetle",
+            },
             {
-                const image = await load_image(TEST_IMAGES.cats);
-                const { pixel_values, original_sizes, reshaped_input_sizes } = await processor(image);
-
-                compare(pixel_values.dims, [1, 3, 224, 224]);
-                compare(avg(pixel_values.data), 0.3015307230282871);
-
-                compare(original_sizes, [[480, 640]]);
-                compare(reshaped_input_sizes, [[224, 224]]);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
-    });
-
-    describe('Audio processors', () => {
-        const audioPromise = new Promise(async (resolve) => {
-            const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/mlk.npy';
-            const buffer = await (await fetch(url)).arrayBuffer();
-            const audio = Float32Array.from(new Float64Array(buffer));
-            resolve(audio);
+              task: "<CAPTION_TO_PHRASE_GROUNDING>",
+              text_input: "A green car parked in front of a yellow building.",
+              generated_text: "</s><s><s><s>A green car<loc_54><loc_330><loc_911><loc_780>a yellow building<loc_0><loc_8><loc_998><loc_635></s>",
+              target: {
+                "<CAPTION_TO_PHRASE_GROUNDING>": {
+                  bboxes: [
+                    [34.88, 158.64, 583.36, 374.64],
+                    [0.32, 4.08, 639.04, 305.04],
+                  ],
+                  labels: ["A green car", "a yellow building"],
+                },
+              },
+              image: "beetle",
+            },
+            // {
+            //     task: "<REFERRING_EXPRESSION_SEGMENTATION>",
+            //     text_input: "a green car",
+            //     generated_text: "</s><s><s><s><loc_279><loc_378><loc_282><loc_376><loc_285><loc_376><loc_293><loc_370><loc_296><loc_370><loc_301><loc_366><loc_304><loc_366><loc_309><loc_362><loc_313><loc_360><loc_318><loc_358><loc_323><loc_355><loc_327><loc_353><loc_334><loc_351><loc_340><loc_349><loc_346><loc_347><loc_353><loc_345><loc_360><loc_343><loc_370><loc_341><loc_381><loc_339><loc_395><loc_337><loc_414><loc_335><loc_486><loc_335><loc_514><loc_337><loc_528><loc_339><loc_539><loc_341><loc_547><loc_343><loc_553><loc_345><loc_560><loc_347><loc_566><loc_349><loc_572><loc_351><loc_578><loc_353><loc_583><loc_355><loc_586><loc_358><loc_589><loc_362><loc_592><loc_368><loc_594><loc_374><loc_597><loc_378><loc_600><loc_385><loc_603><loc_391><loc_605><loc_397><loc_608><loc_401><loc_609><loc_408><loc_612><loc_414><loc_616><loc_420><loc_619><loc_426><loc_622><loc_433><loc_630><loc_443><loc_634><loc_445><loc_639><loc_451><loc_644><loc_458><loc_674><loc_458><loc_675><loc_460><loc_691><loc_462><loc_713><loc_462><loc_727><loc_464><loc_738><loc_466><loc_747><loc_468><loc_757><loc_470><loc_765><loc_472><loc_771><loc_474><loc_777><loc_476><loc_783><loc_478><loc_788><loc_481><loc_793><loc_483><loc_797><loc_485><loc_802><loc_487><loc_807><loc_491><loc_810><loc_491><loc_818><loc_497><loc_821><loc_497><loc_824><loc_499><loc_827><loc_503><loc_832><loc_505><loc_837><loc_510><loc_841><loc_516><loc_846><loc_520><loc_852><loc_524><loc_857><loc_526><loc_860><loc_526><loc_865><loc_528><loc_869><loc_532><loc_872><loc_532><loc_882><loc_539><loc_885><loc_543><loc_888><loc_543><loc_891><loc_545><loc_894><loc_549><loc_896><loc_553><loc_897><loc_559><loc_897><loc_566><loc_896><loc_568><loc_894><loc_574><loc_894><loc_582><loc_896><loc_595><loc_897><loc_597><loc_899><loc_603><loc_900><loc_609><loc_902><loc_622><loc_902><loc_628><loc_900><loc_630><loc_899><loc_647><loc_899><loc_651><loc_900><loc_653><loc_902><loc_659><loc_902><loc_668><loc_897><loc_670><loc_888><loc_672><loc_874><loc_672><loc_865><loc_674><loc_863><loc_693><loc_862><loc_701><loc_860><loc_707><loc_859><loc_714><loc_857><loc_718><loc_854><loc_722><loc_852><loc_728><loc_849><loc_734><loc_846><loc_741><loc_835><loc_755><loc_830><loc_759><loc_821><loc_766><loc_816><loc_768><loc_810><loc_770><loc_774><loc_770><loc_765><loc_768><loc_760><loc_766><loc_755><loc_764><loc_749><loc_759><loc_744><loc_755><loc_738><loc_749><loc_727><loc_734><loc_724><loc_728><loc_721><loc_722><loc_719><loc_718><loc_719><loc_714><loc_716><loc_707><loc_715><loc_701><loc_715><loc_697><loc_713><loc_693><loc_710><loc_689><loc_707><loc_691><loc_700><loc_701><loc_697><loc_703><loc_666><loc_701><loc_663><loc_701><loc_661><loc_703><loc_657><loc_705><loc_647><loc_707><loc_644><loc_707><loc_642><loc_705><loc_594><loc_703><loc_339><loc_703><loc_337><loc_705><loc_329><loc_707><loc_323><loc_707><loc_318><loc_705><loc_315><loc_703><loc_312><loc_699><loc_309><loc_697><loc_304><loc_697><loc_301><loc_701><loc_299><loc_705><loc_299><loc_709><loc_298><loc_714><loc_295><loc_718><loc_293><loc_724><loc_290><loc_728><loc_288><loc_734><loc_285><loc_741><loc_276><loc_753><loc_271><loc_757><loc_266><loc_761><loc_260><loc_766><loc_255><loc_768><loc_251><loc_770><loc_240><loc_772><loc_205><loc_772><loc_199><loc_770><loc_194><loc_768><loc_185><loc_761><loc_180><loc_757><loc_174><loc_751><loc_166><loc_741><loc_163><loc_734><loc_161><loc_728><loc_158><loc_724><loc_157><loc_720><loc_155><loc_714><loc_155><loc_707><loc_154><loc_703><loc_149><loc_697><loc_146><loc_695><loc_135><loc_695><loc_125><loc_697><loc_124><loc_699><loc_116><loc_701><loc_103><loc_701><loc_99><loc_697><loc_83><loc_697><loc_78><loc_695><loc_75><loc_691><loc_75><loc_684><loc_78><loc_680><loc_80><loc_676><loc_80><loc_672><loc_69><loc_670><loc_63><loc_668><loc_60><loc_666><loc_58><loc_661><loc_56><loc_653><loc_56><loc_639><loc_60><loc_634><loc_66><loc_632><loc_72><loc_630><loc_86><loc_628><loc_102><loc_628><loc_105><loc_626><loc_108><loc_622><loc_110><loc_618><loc_110><loc_609><loc_108><loc_607><loc_107><loc_601><loc_105><loc_593><loc_105><loc_576><loc_107><loc_570><loc_108><loc_566><loc_113><loc_559><loc_116><loc_557><loc_121><loc_555><loc_124><loc_555><loc_127><loc_551><loc_125><loc_543><loc_127><loc_539><loc_130><loc_534><loc_138><loc_534><loc_141><loc_532><loc_144><loc_528><loc_144><loc_526><loc_152><loc_514><loc_179><loc_478><loc_183><loc_472><loc_191><loc_464><loc_196><loc_460><loc_197><loc_460><loc_202><loc_456><loc_208><loc_449><loc_216><loc_441><loc_224><loc_433><loc_233><loc_420><loc_240><loc_414><loc_241><loc_414><loc_246><loc_410><loc_254><loc_401><loc_263><loc_389><loc_268><loc_385><loc_276><loc_381><loc_279><loc_376></s>",
+            //     target: {
+            //         '<REFERRING_EXPRESSION_SEGMENTATION>': {
+            //             polygons: [[[[178.88, 181.68, 180.8, 180.72, 182.72, 180.72, 187.84, 177.84, 189.76, 177.84, 192.96, 175.92, 194.88, 175.92, 198.08, 174, 200.64, 173.04, 203.84, 172.08, 207.04, 170.64, 209.6, 169.68, 214.08, 168.72, 217.92, 167.76, 221.76, 166.8, 226.24, 165.84, 230.72, 164.88, 237.12, 163.92, 244.16, 162.96, 253.12, 162, 265.28, 161.04, 311.36, 161.04, 329.28, 162, 338.24, 162.96, 345.28, 163.92, 350.4, 164.88, 354.24, 165.84, 358.72, 166.8, 362.56, 167.76, 366.4, 168.72, 370.24, 169.68, 373.44, 170.64, 375.36, 172.08, 377.28, 174, 379.2, 176.88, 380.48, 179.76, 382.4, 181.68, 384.32, 185.04, 386.24, 187.92, 387.52, 190.8, 389.44, 192.72, 390.08, 196.08, 392, 198.96, 394.56, 201.84, 396.48, 204.72, 398.4, 208.08, 403.52, 212.88, 406.08, 213.84, 409.28, 216.72, 412.48, 220.08, 431.68, 220.08, 432.32, 221.04, 442.56, 222, 456.64, 222, 465.6, 222.96, 472.64, 223.92, 478.4, 224.88, 484.8, 225.84, 489.92, 226.8, 493.76, 227.76, 497.6, 228.72, 501.44, 229.68, 504.64, 231.12, 507.84, 232.08, 510.4, 233.04, 513.6, 234, 516.8, 235.92, 518.72, 235.92, 523.84, 238.8, 525.76, 238.8, 527.68, 239.76, 529.6, 241.68, 532.8, 242.64, 536, 245.04, 538.56, 247.92, 541.76, 249.84, 545.6, 251.76, 548.8, 252.72, 550.72, 252.72, 553.92, 253.68, 556.48, 255.6, 558.4, 255.6, 564.8, 258.96, 566.72, 260.88, 568.64, 260.88, 570.56, 261.84, 572.48, 263.76, 573.76, 265.68, 574.4, 268.56, 574.4, 271.92, 573.76, 272.88, 572.48, 275.76, 572.48, 279.6, 573.76, 285.84, 574.4, 286.8, 575.68, 289.68, 576.32, 292.56, 577.6, 298.8, 577.6, 301.68, 576.32, 302.64, 575.68, 310.8, 575.68, 312.72, 576.32, 313.68, 577.6, 316.56, 577.6, 320.88, 574.4, 321.84, 568.64, 322.8, 559.68, 322.8, 553.92, 323.76, 552.64, 332.88, 552, 336.72, 550.72, 339.6, 550.08, 342.96, 548.8, 344.88, 546.88, 346.8, 545.6, 349.68, 543.68, 352.56, 541.76, 355.92, 534.72, 362.64, 531.52, 364.56, 525.76, 367.92, 522.56, 368.88, 518.72, 369.84, 495.68, 369.84, 489.92, 368.88, 486.72, 367.92, 483.52, 366.96, 479.68, 364.56, 476.48, 362.64, 472.64, 359.76, 465.6, 352.56, 463.68, 349.68, 461.76, 346.8, 460.48, 344.88, 460.48, 342.96, 458.56, 339.6, 457.92, 336.72, 457.92, 334.8, 456.64, 332.88, 454.72, 330.96, 452.8, 331.92, 448.32, 336.72, 446.4, 337.68, 426.56, 336.72, 424.64, 336.72, 423.36, 337.68, 420.8, 338.64, 414.4, 339.6, 412.48, 339.6, 411.2, 338.64, 380.48, 337.68, 217.28, 337.68, 216, 338.64, 210.88, 339.6, 207.04, 339.6, 203.84, 338.64, 201.92, 337.68, 200, 335.76, 198.08, 334.8, 194.88, 334.8, 192.96, 336.72, 191.68, 338.64, 191.68, 340.56, 191.04, 342.96, 189.12, 344.88, 187.84, 347.76, 185.92, 349.68, 184.64, 352.56, 182.72, 355.92, 176.96, 361.68, 173.76, 363.6, 170.56, 365.52, 166.72, 367.92, 163.52, 368.88, 160.96, 369.84, 153.92, 370.8, 131.52, 370.8, 127.68, 369.84, 124.48, 368.88, 118.72, 365.52, 115.52, 363.6, 111.68, 360.72, 106.56, 355.92, 104.64, 352.56, 103.36, 349.68, 101.44, 347.76, 100.8, 345.84, 99.52, 342.96, 99.52, 339.6, 98.88, 337.68, 95.68, 334.8, 93.76, 333.84, 86.72, 333.84, 80.32, 334.8, 79.68, 335.76, 74.56, 336.72, 66.24, 336.72, 63.68, 334.8, 53.44, 334.8, 50.24, 333.84, 48.32, 331.92, 48.32, 328.56, 50.24, 326.64, 51.52, 324.72, 51.52, 322.8, 44.48, 321.84, 40.64, 320.88, 38.72, 319.92, 37.44, 317.52, 36.16, 313.68, 36.16, 306.96, 38.72, 304.56, 42.56, 303.6, 46.4, 302.64, 55.36, 301.68, 65.6, 301.68, 67.52, 300.72, 69.44, 298.8, 70.72, 296.88, 70.72, 292.56, 69.44, 291.6, 68.8, 288.72, 67.52, 284.88, 67.52, 276.72, 68.8, 273.84, 69.44, 271.92, 72.64, 268.56, 74.56, 267.6, 77.76, 266.64, 79.68, 266.64, 81.6, 264.72, 80.32, 260.88, 81.6, 258.96, 83.52, 256.56, 88.64, 256.56, 90.56, 255.6, 92.48, 253.68, 92.48, 252.72, 97.6, 246.96, 114.88, 229.68, 117.44, 226.8, 122.56, 222.96, 125.76, 221.04, 126.4, 221.04, 129.6, 219.12, 133.44, 215.76, 138.56, 211.92, 143.68, 208.08, 149.44, 201.84, 153.92, 198.96, 154.56, 198.96, 157.76, 197.04, 162.88, 192.72, 168.64, 186.96, 171.84, 185.04, 176.96, 183.12, 178.88, 180.72]]]],
+            //             labels: [''],
+            //         }
+            //     },
+            //     image: 'beetle',
+            // },
+            // {
+            //     task: "<REGION_TO_SEGMENTATION>",
+            //     text_input: "<loc_702><loc_575><loc_866><loc_772>",
+            //     generated_text: "</s><s><s><s><loc_734><loc_600><loc_740><loc_594><loc_745><loc_590><loc_748><loc_588><loc_751><loc_588><loc_756><loc_584><loc_760><loc_582><loc_765><loc_580><loc_773><loc_578><loc_800><loc_578><loc_804><loc_580><loc_809><loc_582><loc_814><loc_586><loc_817><loc_586><loc_820><loc_590><loc_825><loc_592><loc_829><loc_596><loc_834><loc_600><loc_848><loc_619><loc_851><loc_625><loc_854><loc_631><loc_859><loc_644><loc_861><loc_650><loc_862><loc_656><loc_864><loc_665><loc_864><loc_692><loc_862><loc_702><loc_861><loc_708><loc_859><loc_715><loc_856><loc_723><loc_853><loc_729><loc_850><loc_735><loc_845><loc_744><loc_839><loc_752><loc_831><loc_760><loc_826><loc_764><loc_823><loc_766><loc_818><loc_768><loc_814><loc_770><loc_806><loc_773><loc_782><loc_773><loc_768><loc_770><loc_762><loc_768><loc_757><loc_766><loc_748><loc_760><loc_743><loc_756><loc_737><loc_750><loc_726><loc_735><loc_723><loc_729><loc_720><loc_723><loc_718><loc_719><loc_718><loc_715><loc_715><loc_708><loc_713><loc_702><loc_712><loc_696><loc_710><loc_688><loc_710><loc_658><loc_712><loc_648><loc_713><loc_640><loc_715><loc_633><loc_718><loc_627><loc_718><loc_623><loc_720><loc_619><loc_723><loc_613></s>",
+            //     target: {
+            //         '<REGION_TO_SEGMENTATION>': {
+            //             polygons: [[[[470.08, 288.24, 473.92, 285.36, 477.12, 283.44, 479.04, 282.48, 480.96, 282.48, 484.16, 280.56, 486.72, 279.6, 489.92, 278.64, 495.04, 277.68, 512.32, 277.68, 514.88, 278.64, 518.08, 279.6, 521.28, 281.52, 523.2, 281.52, 525.12, 283.44, 528.32, 284.4, 530.88, 286.32, 534.08, 288.24, 543.04, 297.36, 544.96, 300.24, 546.88, 303.12, 550.08, 309.36, 551.36, 312.24, 552, 315.12, 553.28, 319.44, 553.28, 332.4, 552, 337.2, 551.36, 340.08, 550.08, 343.44, 548.16, 347.28, 546.24, 350.16, 544.32, 353.04, 541.12, 357.36, 537.28, 361.2, 532.16, 365.04, 528.96, 366.96, 527.04, 367.92, 523.84, 368.88, 521.28, 369.84, 516.16, 371.28, 500.8, 371.28, 491.84, 369.84, 488, 368.88, 484.8, 367.92, 479.04, 365.04, 475.84, 363.12, 472, 360.24, 464.96, 353.04, 463.04, 350.16, 461.12, 347.28, 459.84, 345.36, 459.84, 343.44, 457.92, 340.08, 456.64, 337.2, 456, 334.32, 454.72, 330.48, 454.72, 316.08, 456, 311.28, 456.64, 307.44, 457.92, 304.08, 459.84, 301.2, 459.84, 299.28, 461.12, 297.36, 463.04, 294.48]]]],
+            //             labels: [''],
+            //         }
+            //     },
+            //     image: 'beetle',
+            // },
+            // {
+            //     task: "<OPEN_VOCABULARY_DETECTION>",
+            //     text_input: "a green car",
+            //     generated_text: "</s><s><s>a green car<loc_53><loc_330><loc_910><loc_779></s>",
+            //     target: {
+            //         '<OPEN_VOCABULARY_DETECTION>': {
+            //             bboxes: [[34.24, 158.64, 582.72, 374.16]],
+            //             bboxes_labels: ['a green car'],
+            //             polygons: [],
+            //             polygons_labels: [],
+            //         }
+            //     },
+            //     image: 'beetle',
+            // },
+            {
+              task: "<REGION_TO_CATEGORY>",
+              text_input: "<loc_52><loc_332><loc_932><loc_774>",
+              generated_text: "</s><s>car<loc_52><loc_332><loc_932><loc_774></s>",
+              target: { "<REGION_TO_CATEGORY>": "car<loc_52><loc_332><loc_932><loc_774>" },
+              image: "beetle",
+            },
+            {
+              task: "<REGION_TO_DESCRIPTION>",
+              text_input: "<loc_52><loc_332><loc_932><loc_774>",
+              generated_text: "</s><s>turquoise Volkswagen Beetle<loc_52><loc_332><loc_932><loc_774></s>",
+              target: { "<REGION_TO_DESCRIPTION>": "turquoise Volkswagen Beetle<loc_52><loc_332><loc_932><loc_774>" },
+              image: "beetle",
+            },
+            {
+              task: "<OCR>",
+              generated_text: "</s><s>CUDAFOR ENGINEERSAn Introduction to High-PerformanceParallel ComputingDUANE STORTIMETE YURTOGLU</s>",
+              target: { "<OCR>": "CUDAFOR ENGINEERSAn Introduction to High-PerformanceParallel ComputingDUANE STORTIMETE YURTOGLU" },
+              image: "book_cover",
+            },
+            {
+              task: "<OCR_WITH_REGION>",
+              generated_text: "</s><s><s><s>CUDA<loc_414><loc_100><loc_932><loc_100><loc_932><loc_229><loc_414><loc_229>FOR ENGINEERS<loc_359><loc_241><loc_932><loc_241><loc_932><loc_298><loc_359><loc_298>An Introduction to High-Performance<loc_287><loc_330><loc_934><loc_332><loc_934><loc_368><loc_287><loc_366>Parallel Computing<loc_595><loc_368><loc_934><loc_372><loc_934><loc_408><loc_595><loc_404>DUANE STORTI<loc_660><loc_882><loc_934><loc_882><loc_934><loc_912><loc_660><loc_912>METE YURTOGLU<loc_625><loc_920><loc_934><loc_920><loc_934><loc_950><loc_625><loc_950></s>",
+              target: {
+                "<OCR_WITH_REGION>": {
+                  quad_boxes: [
+                    [167.0435028076172, 50.25, 375.7974853515625, 50.25, 375.7974853515625, 114.75, 167.0435028076172, 114.75],
+                    [144.8784942626953, 120.75, 375.7974853515625, 120.75, 375.7974853515625, 149.25, 144.8784942626953, 149.25],
+                    [115.86249542236328, 165.25, 376.6034851074219, 166.25, 376.6034851074219, 184.25, 115.86249542236328, 183.25],
+                    [239.9864959716797, 184.25, 376.6034851074219, 186.25, 376.6034851074219, 204.25, 239.9864959716797, 202.25],
+                    [266.1814880371094, 441.25, 376.6034851074219, 441.25, 376.6034851074219, 456.25, 266.1814880371094, 456.25],
+                    [252.0764923095703, 460.25, 376.6034851074219, 460.25, 376.6034851074219, 475.25, 252.0764923095703, 475.25],
+                  ],
+
+                  // NOTE: Python version has a bug here, it should be "CUDA" instead of "</s>CUDA"
+                  labels: [/* '</s>CUDA' */ "CUDA", "FOR ENGINEERS", "An Introduction to High-Performance", "Parallel Computing", "DUANE STORTI", "METE YURTOGLU"],
+                },
+              },
+              image: "book_cover",
+            },
+          ];
+
+          for (const { task, generated_text, target, image } of TESTS) {
+            it(task, () => {
+              const result = processor.post_process_generation(generated_text, task, images[image].size);
+              compare(result, target);
+            });
+          }
         });
-
-        it('WhisperFeatureExtractor', async () => {
-            const audio = await audioPromise;
-            const processor = await AutoProcessor.from_pretrained('Xenova/whisper-tiny.en');
-            const { input_features } = await processor(audio);
-            compare(input_features.dims, [1, 80, 3000]);
-            expect(avg(input_features.data)).toBeCloseTo(-0.2813588131551941);
-            expect(input_features.data[0]).toBeCloseTo(0.33168578147888184);
-            expect(input_features.data[1]).toBeCloseTo(0.30986475944519043);
-            expect(input_features.data[81]).toBeCloseTo(0.10727232694625854);
-            expect(input_features.data[3001]).toBeCloseTo(0.2555035352706909);
-        }, MAX_TEST_EXECUTION_TIME);
-
-        it('ASTFeatureExtractor', async () => {
-            const audio = await audioPromise;
-            const processor = await AutoProcessor.from_pretrained('Xenova/ast-finetuned-audioset-10-10-0.4593');
-            { // truncation
-                const { input_values } = await processor(audio);
-                compare(input_values.dims, [1, 1024, 128]);
-
-                expect(avg(input_values.data)).toBeCloseTo(-0.04054912979309085);
-                expect(input_values.data[0]).toBeCloseTo(-0.5662586092948914);
-                expect(input_values.data[1]).toBeCloseTo(-1.0300861597061157);
-                expect(input_values.data[129]).toBeCloseTo(-1.084834098815918);
-                expect(input_values.data[1025]).toBeCloseTo(-1.1204065084457397);
-            }
-            { // padding
-                const { input_values } = await processor(audio.slice(0, 1000));
-                compare(input_values.dims, [1, 1024, 128]); // [1, 4, 128] -> (padded to) -> [1, 1024, 128]
-
-                expect(avg(input_values.data)).toBeCloseTo(0.4647964835166931);
-                expect(input_values.data[0]).toBeCloseTo(-0.5662586092948914);
-                expect(input_values.data[1]).toBeCloseTo(-1.0300861597061157);
-                expect(input_values.data[129]).toBeCloseTo(-1.084834098815918);
-
-                // padded values
-                expect(input_values.data[1025]).toBeCloseTo(0.46703237295150757);
-                expect(input_values.data[2049]).toBeCloseTo(0.46703237295150757);
-                expect(input_values.data[10000]).toBeCloseTo(0.46703237295150757);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
-
-        it('SeamlessM4TFeatureExtractor', async () => {
-            const audio = await audioPromise;
-            const processor = await AutoProcessor.from_pretrained('Xenova/wav2vec2-bert-CV16-en');
-            { // normal
-                console.log({ audio })
-                const { input_features, attention_mask } = await processor(audio);
-                compare(input_features.dims, [1, 649, 160]);
-                compare(attention_mask.dims, [1, 649]);
-
-                expect(avg(input_features.data)).toBeCloseTo(-2.938903875815413e-08);
-                expect(input_features.data[0]).toBeCloseTo(1.1939343214035034);
-                expect(input_features.data[1]).toBeCloseTo(0.7874255180358887);
-                expect(input_features.data[160]).toBeCloseTo(-0.712975025177002);
-                expect(input_features.data[161]).toBeCloseTo(0.045802414417266846);
-                expect(input_features.data.at(-1)).toBeCloseTo(-1.3328346014022827);
-
-                expect(sum(attention_mask.data)).toEqual(649);
-            }
-            { // padding (pad_to_multiple_of=2)
-                const { input_features, attention_mask } = await processor(audio.slice(0, 10000));
-
-                // [1, 61, 80] -> [1, 62, 80] -> [1, 31, 160]
-                compare(input_features.dims, [1, 31, 160]);
-                compare(attention_mask.dims, [1, 31]);
-
-                expect(avg(input_features.data)).toBeCloseTo(0.01612919569015503);
-                expect(input_features.data[0]).toBeCloseTo(0.9657132029533386);
-                expect(input_features.data[1]).toBeCloseTo(0.12912897765636444);
-                expect(input_features.data[160]).toBeCloseTo(-1.2364212274551392);
-                expect(input_features.data[161]).toBeCloseTo(-0.9703778028488159);
-                expect(input_features.data.at(-1)).toBeCloseTo(1); // padding value
-
-                expect(sum(attention_mask.data)).toEqual(30);
-            }
-        }, MAX_TEST_EXECUTION_TIME);
-
-        it('ClapFeatureExtractor', async () => {
-            const audio = await audioPromise;
-            const processor = await AutoProcessor.from_pretrained('Xenova/clap-htsat-unfused');
-            { // truncation
-                // Since truncation uses a random strategy, we override
-                // Math.random to ensure that the test is deterministic
-                const originalRandom = Math.random;
-                Math.random = () => 0.5;
-
-                let long_audio = new Float32Array(500000);
-                long_audio.set(audio);
-                long_audio.set(audio, long_audio.length - audio.length);
-
-                const { input_features } = await processor(long_audio);
-                compare(input_features.dims, [1, 1, 1001, 64]);
-
-                expect(avg(input_features.data)).toBeCloseTo(-37.94569396972656);
-                expect(input_features.data[0]).toBeCloseTo(-53.32647705078125);
-                expect(input_features.data[1]).toBeCloseTo(-47.76755142211914);
-                expect(input_features.data[65]).toBeCloseTo(-36.32261276245117);
-                expect(input_features.data[1002]).toBeCloseTo(-28.0314884185791);
-                expect(input_features.data[10000]).toBeCloseTo(-21.905902862548828);
-                expect(input_features.data[60000]).toBeCloseTo(-14.877863883972168);
-                expect(input_features.data[64062]).toBeCloseTo(-37.9784049987793);
-                expect(input_features.data[64063]).toBeCloseTo(-37.73963928222656);
-
-                // Reset Math.random
-                Math.random = originalRandom;
-            }
-            { // padding
-                const { input_features } = await processor(audio);
-                compare(input_features.dims, [1, 1, 1001, 64]);
-
-                expect(avg(input_features.data)).toBeCloseTo(-34.99049377441406);
-                expect(input_features.data[0]).toBeCloseTo(-21.32573890686035);
-                expect(input_features.data[1]).toBeCloseTo(-26.168411254882812);
-                expect(input_features.data[65]).toBeCloseTo(-29.716018676757812);
-                expect(input_features.data[1002]).toBeCloseTo(-32.16273498535156);
-                expect(input_features.data[10000]).toBeCloseTo(-19.9283390045166);
-
-                // padded values
-                expect(input_features.data[60000]).toBeCloseTo(-100.0);
-                expect(input_features.data[64062]).toBeCloseTo(-100.0);
-                expect(input_features.data[64063]).toBeCloseTo(-100.0);
-            }
-
-
-        }, MAX_TEST_EXECUTION_TIME);
-    });
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
 });
diff --git a/tests/requirements.txt b/tests/requirements.txt
deleted file mode 100644
index 309e747a4..000000000
--- a/tests/requirements.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-transformers[torch]@git+https://github.com/huggingface/transformers
-sacremoses==0.0.53
-sentencepiece==0.1.99
-protobuf==4.24.3
-rjieba==0.1.11
-jinja2==3.1.4
diff --git a/tests/tensor.test.js b/tests/tensor.test.js
deleted file mode 100644
index bc056b9c8..000000000
--- a/tests/tensor.test.js
+++ /dev/null
@@ -1,203 +0,0 @@
-
-import { Tensor } from '../src/transformers.js';
-import { compare } from './test_utils.js';
-import { cat, mean, stack, layer_norm } from '../src/utils/tensor.js';
-
-describe('Tensor operations', () => {
-
-    describe('cat', () => {
-
-        it('should concatenate on dim=0', async () => {
-            const t1 = new Tensor('float32', [1, 2, 3], [1, 3]);
-            const t2 = new Tensor('float32', [4, 5, 6, 7, 8, 9], [2, 3]);
-            const t3 = new Tensor('float32', [10, 11, 12], [1, 3]);
-
-            const target1 = new Tensor('float32', [1, 2, 3, 4, 5, 6, 7, 8, 9], [3, 3]);
-            const target2 = new Tensor('float32', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [4, 3]);
-
-            // 2 tensors
-            const concatenated1 = cat([t1, t2], 0);
-            compare(concatenated1, target1, 1e-3);
-
-            // 3 tensors
-            const concatenated2 = cat([t1, t2, t3], 0);
-            compare(concatenated2, target2, 1e-3);
-        });
-
-
-        it('should concatenate on dim=1', async () => {
-            const t1 = new Tensor('float32', [1, 2, 3, -1, -2, -3], [2, 3, 1]);
-            const t2 = new Tensor('float32', [4, -4], [2, 1, 1]);
-            const t3 = new Tensor('float32', [5, 6, -5, -6], [2, 2, 1]);
-
-            const target1 = new Tensor('float32', [1, 2, 3, 4, -1, -2, -3, -4], [2, 4, 1]);
-            const target2 = new Tensor('float32', [1, 2, 3, 4, 5, 6, -1, -2, -3, -4, -5, -6], [2, 6, 1]);
-
-            // 2 tensors
-            const concatenated1 = cat([t1, t2], 1);
-            compare(concatenated1, target1, 1e-3);
-
-            // 3 tensors
-            const concatenated2 = cat([t1, t2, t3], 1);
-            compare(concatenated2, target2, 1e-3);
-        });
-
-
-        it('should concatenate on dim=-2', async () => {
-
-            const t1 = new Tensor('float32', [1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16], [2, 1, 3, 2]);
-            const t2 = new Tensor('float32', [7, 8, 9, 10, 17, 18, 19, 20], [2, 1, 2, 2]);
-
-            const target = new Tensor('float32', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [2, 1, 5, 2]);
-
-            const concatenated = cat([t1, t2], -2);
-
-            compare(concatenated, target, 1e-3);
-
-        });
-
-        // TODO add tests for errors
-    });
-
-    describe('stack', () => {
-
-        const t1 = new Tensor('float32', [0, 1, 2, 3, 4, 5], [1, 3, 2]);
-
-        it('should stack on dim=0', async () => {
-            const target1 = new Tensor('float32', [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5], [2, 1, 3, 2]);
-            const target2 = new Tensor('float32', [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5], [3, 1, 3, 2]);
-
-            // 2 tensors
-            const stacked1 = stack([t1, t1], 0);
-            compare(stacked1, target1, 1e-3);
-
-            // 3 tensors
-            const stacked2 = stack([t1, t1, t1], 0);
-            compare(stacked2, target2, 1e-3);
-        });
-
-        it('should stack on dim=1', async () => {
-            const target1 = new Tensor('float32', [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5], [1, 2, 3, 2]);
-            const target2 = new Tensor('float32', [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5], [1, 3, 3, 2]);
-
-            // 2 tensors
-            const stacked1 = stack([t1, t1], 1);
-            compare(stacked1, target1, 1e-3);
-
-            // 3 tensors
-            const stacked2 = stack([t1, t1, t1], 1);
-            compare(stacked2, target2, 1e-3);
-        });
-
-        it('should stack on dim=-1', async () => {
-            const target1 = new Tensor('float32', [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5], [1, 3, 2, 2]);
-            const target2 = new Tensor('float32', [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5], [1, 3, 2, 3]);
-
-            // 2 tensors
-            const stacked1 = stack([t1, t1], -1);
-            compare(stacked1, target1, 1e-3);
-
-            // 3 tensors
-            const stacked2 = stack([t1, t1, t1], -1);
-            compare(stacked2, target2, 1e-3);
-        });
-    });
-
-    describe('permute', () => {
-        it('should permute', async () => {
-            const x = new Tensor(
-                'float32',
-                [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23],
-                [2, 3, 4],
-            );
-            // Permute axes to (0, 1, 2) - No change
-            const permuted_1 = x.permute(0, 1, 2);
-            const target_1 = x;
-            compare(permuted_1, target_1, 1e-3);
-
-            // Permute axes to (0, 2, 1)
-            const permuted_2 = x.permute(0, 2, 1);
-            const target_2 = new Tensor(
-                'float32',
-                [0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11, 12, 16, 20, 13, 17, 21, 14, 18, 22, 15, 19, 23],
-                [2, 4, 3],
-            );
-            compare(permuted_2, target_2, 1e-3);
-
-            // Permute axes to (1, 0, 2)
-            const permuted_3 = x.permute(1, 0, 2);
-            const target_3 = new Tensor(
-                'float32',
-                [0, 1, 2, 3, 12, 13, 14, 15, 4, 5, 6, 7, 16, 17, 18, 19, 8, 9, 10, 11, 20, 21, 22, 23],
-                [3, 2, 4],
-            );
-            compare(permuted_3, target_3, 1e-3);
-
-            // Permute axes to (1, 2, 0)
-            const permuted_4 = x.permute(1, 2, 0);
-            const target_4 = new Tensor(
-                'float32',
-                [0, 12, 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23],
-                [3, 4, 2],
-            );
-            compare(permuted_4, target_4, 1e-3);
-
-            // Permute axes to (2, 0, 1)
-            const permuted_5 = x.permute(2, 0, 1);
-            const target_5 = new Tensor(
-                'float32',
-                [0, 4, 8, 12, 16, 20, 1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23],
-                [4, 2, 3],
-            );
-            compare(permuted_5, target_5, 1e-3);
-
-            // Permute axes to (2, 1, 0)
-            const permuted_6 = x.permute(2, 1, 0);
-            const target_6 = new Tensor(
-                'float32',
-                [0, 12, 4, 16, 8, 20, 1, 13, 5, 17, 9, 21, 2, 14, 6, 18, 10, 22, 3, 15, 7, 19, 11, 23],
-                [4, 3, 2],
-            );
-            compare(permuted_6, target_6, 1e-3);
-        });
-    });
-
-    describe('mean', () => {
-        it('should calculate mean', async () => {
-            const t1 = new Tensor('float32', [1, 2, 3, 4, 5, 6], [2, 3, 1]);
-
-            const target = new Tensor('float32', [3.5], []);
-
-            const target0 = new Tensor('float32', [2.5, 3.5, 4.5], [3, 1]);
-            const target1 = new Tensor('float32', [2, 5], [2, 1]);
-            const target2 = new Tensor('float32', [1, 2, 3, 4, 5, 6], [2, 3]);
-
-            let avg = mean(t1);
-            compare(avg, target, 1e-3);
-
-            let avg0 = mean(t1, 0);
-            compare(avg0, target0, 1e-3);
-
-            let avg1 = mean(t1, 1);
-            compare(avg1, target1, 1e-3);
-
-            let avg2 = mean(t1, 2);
-            compare(avg2, target2, 1e-3);
-
-        })
-    });
-
-    describe('layer_norm', () => {
-        it('should calculate layer norm', async () => {
-            const t1 = new Tensor('float32', [1, 2, 3, 4, 5, 6], [2, 3]);
-
-            const target = new Tensor('float32', [
-                -1.2247356176376343, 0.0, 1.2247356176376343,
-                -1.2247357368469238, -1.1920928955078125e-07, 1.2247354984283447,
-            ], [2, 3]);
-
-            const norm = layer_norm(t1, [t1.dims.at(-1)]);
-            compare(norm, target, 1e-3);
-        });
-    });
-});
diff --git a/tests/test_utils.js b/tests/test_utils.js
index 2a05c657f..9928bf75b 100644
--- a/tests/test_utils.js
+++ b/tests/test_utils.js
@@ -1,32 +1,30 @@
-
-
 export async function loadAudio(url) {
-    // NOTE: Since the Web Audio API is not available in Node.js, we will need to use the `wavefile` library to obtain the raw audio data.
-    // For more information, see: https://huggingface.co/docs/transformers.js/guides/node-audio-processing
-    let wavefile = (await import('wavefile')).default;
-
-    // Load audio data
-    let buffer = Buffer.from(await fetch(url).then(x => x.arrayBuffer()))
+  // NOTE: Since the Web Audio API is not available in Node.js, we will need to use the `wavefile` library to obtain the raw audio data.
+  // For more information, see: https://huggingface.co/docs/transformers.js/guides/node-audio-processing
+  let wavefile = (await import("wavefile")).default;
 
-    // Read .wav file and convert it to required format
-    let wav = new wavefile.WaveFile(buffer);
-    wav.toBitDepth('32f'); // Pipeline expects input as a Float32Array
-    wav.toSampleRate(16000); // Whisper expects audio with a sampling rate of 16000
-    let audioData = wav.getSamples();
-    if (Array.isArray(audioData)) {
-        if (audioData.length > 1) {
-            const SCALING_FACTOR = Math.sqrt(2);
+  // Load audio data
+  let buffer = Buffer.from(await fetch(url).then((x) => x.arrayBuffer()));
 
-            // Merge channels (into first channel to save memory)
-            for (let i = 0; i < audioData[0].length; ++i) {
-                audioData[0][i] = SCALING_FACTOR * (audioData[0][i] + audioData[1][i]) / 2;
-            }
-        }
+  // Read .wav file and convert it to required format
+  let wav = new wavefile.WaveFile(buffer);
+  wav.toBitDepth("32f"); // Pipeline expects input as a Float32Array
+  wav.toSampleRate(16000); // Whisper expects audio with a sampling rate of 16000
+  let audioData = wav.getSamples();
+  if (Array.isArray(audioData)) {
+    if (audioData.length > 1) {
+      const SCALING_FACTOR = Math.sqrt(2);
 
-        // Select first channel
-        audioData = audioData[0];
+      // Merge channels (into first channel to save memory)
+      for (let i = 0; i < audioData[0].length; ++i) {
+        audioData[0][i] = (SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
+      }
     }
-    return audioData;
+
+    // Select first channel
+    audioData = audioData[0];
+  }
+  return audioData;
 }
 /**
  * Deep equality test (for arrays and objects) with tolerance for floating point numbers
@@ -35,38 +33,33 @@ export async function loadAudio(url) {
  * @param {number} tol Tolerance for floating point numbers
  */
 export function compare(val1, val2, tol = 0.1) {
-    if (
-        (val1 !== null && val2 !== null) &&
-        (typeof val1 === 'object' && typeof val2 === 'object')
-    ) {
-        // Both are non-null objects
-
-        if (Array.isArray(val1) && Array.isArray(val2)) {
-            expect(val1).toHaveLength(val2.length);
+  if (val1 !== null && val2 !== null && typeof val1 === "object" && typeof val2 === "object") {
+    // Both are non-null objects
 
-            for (let i = 0; i < val1.length; ++i) {
-                compare(val1[i], val2[i], tol);
-            }
+    if (Array.isArray(val1) && Array.isArray(val2)) {
+      expect(val1).toHaveLength(val2.length);
 
-        } else {
-            expect(Object.keys(val1)).toHaveLength(Object.keys(val2).length);
+      for (let i = 0; i < val1.length; ++i) {
+        compare(val1[i], val2[i], tol);
+      }
+    } else {
+      expect(Object.keys(val1)).toHaveLength(Object.keys(val2).length);
 
-            for (let key in val1) {
-                compare(val1[key], val2[key], tol);
-            }
-        }
+      for (let key in val1) {
+        compare(val1[key], val2[key], tol);
+      }
+    }
+  } else {
+    // At least one of them is not an object
+    // First check that both have the same type
+    expect(typeof val1).toEqual(typeof val2);
 
+    if (typeof val1 === "number" && (!Number.isInteger(val1) || !Number.isInteger(val2))) {
+      // If both are numbers and at least one of them is not an integer
+      expect(val1).toBeCloseTo(val2, -Math.log10(tol));
     } else {
-        // At least one of them is not an object
-        // First check that both have the same type
-        expect(typeof val1).toEqual(typeof val2);
-
-        if (typeof val1 === 'number' && (!Number.isInteger(val1) || !Number.isInteger(val2))) {
-            // If both are numbers and at least one of them is not an integer
-            expect(val1).toBeCloseTo(val2, -Math.log10(tol));
-        } else {
-            // Perform equality test
-            expect(val1).toEqual(val2);
-        }
+      // Perform equality test
+      expect(val1).toEqual(val2);
     }
-}
\ No newline at end of file
+  }
+}
diff --git a/tests/tiny_random.test.js b/tests/tiny_random.test.js
new file mode 100644
index 000000000..5d97d53a1
--- /dev/null
+++ b/tests/tiny_random.test.js
@@ -0,0 +1,2550 @@
+import {
+  // Tokenizers
+  CodeGenTokenizer,
+  LlamaTokenizer,
+  CohereTokenizer,
+  GemmaTokenizer,
+  GPT2Tokenizer,
+  GPTNeoXTokenizer,
+  BloomTokenizer,
+  BertTokenizer,
+  T5Tokenizer,
+  WhisperTokenizer,
+  BartTokenizer,
+  MarianTokenizer,
+  PreTrainedTokenizer,
+  AutoTokenizer,
+
+  // Processors
+  CLIPImageProcessor,
+  AutoProcessor,
+  Processor,
+  Florence2Processor,
+
+  // Models
+  LlamaForCausalLM,
+  GraniteForCausalLM,
+  CohereModel,
+  CohereForCausalLM,
+  GemmaForCausalLM,
+  Gemma2ForCausalLM,
+  OPTForCausalLM,
+  GPTNeoXForCausalLM,
+  GPTJForCausalLM,
+  BloomForCausalLM,
+  GPTBigCodeForCausalLM,
+  GPT2LMHeadModel,
+  JAISLMHeadModel,
+  MptForCausalLM,
+  CodeGenForCausalLM,
+  MistralForCausalLM,
+  GPTNeoForCausalLM,
+  BertForMaskedLM,
+  BertForSequenceClassification,
+  T5ForConditionalGeneration,
+  T5Model,
+  BertModel,
+  BertForTokenClassification,
+  BertForQuestionAnswering,
+  MusicgenForConditionalGeneration,
+  LlavaForConditionalGeneration,
+  WhisperForConditionalGeneration,
+  VisionEncoderDecoderModel,
+  Florence2ForConditionalGeneration,
+  MarianMTModel,
+
+  // Pipelines
+  pipeline,
+  FillMaskPipeline,
+  TextClassificationPipeline,
+  TextGenerationPipeline,
+  ImageClassificationPipeline,
+  ZeroShotImageClassificationPipeline,
+  TokenClassificationPipeline,
+  QuestionAnsweringPipeline,
+
+  // Other
+  full,
+  RawImage,
+} from "../src/transformers.js";
+
+import { init, MAX_TEST_TIME, MAX_MODEL_LOAD_TIME, MAX_TEST_EXECUTION_TIME, MAX_MODEL_DISPOSE_TIME } from "./init.js";
+import { compare } from "./test_utils.js";
+
+init();
+
+const DEFAULT_MODEL_OPTIONS = {
+  dtype: "fp32",
+};
+describe("Tiny random models", () => {
+  describe("bert", () => {
+    describe("BertModel", () => {
+      const model_id = "hf-internal-testing/tiny-random-BertModel";
+
+      /** @type {BertModel} */
+      let model;
+      /** @type {BertTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await BertModel.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await BertTokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const { last_hidden_state } = await model(inputs);
+          expect(last_hidden_state.dims).toEqual([1, 7, 32]);
+          expect(last_hidden_state.mean().item()).toBeCloseTo(0.0, 5);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const { last_hidden_state } = await model(inputs);
+          expect(last_hidden_state.dims).toEqual([2, 12, 32]);
+          expect(last_hidden_state.mean().item()).toBeCloseTo(1.4901161193847656e-8, 5);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+
+    describe("BertForMaskedLM", () => {
+      const model_id = "hf-internal-testing/tiny-random-BertForMaskedLM";
+
+      const texts = ["The goal of life is [MASK].", "Paris is the [MASK] of France."];
+
+      /** @type {BertForMaskedLM} */
+      let model;
+      /** @type {BertTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await BertForMaskedLM.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await BertTokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer(texts[0]);
+          const { logits } = await model(inputs);
+          expect(logits.dims).toEqual([1, 19, 1124]);
+          expect(logits.mean().item()).toBeCloseTo(0.0016587056452408433, 5);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(texts, { padding: true });
+          const { logits } = await model(inputs);
+          expect(logits.dims).toEqual([2, 22, 1124]);
+          expect(logits.mean().item()).toBeCloseTo(0.0017160633578896523, 5);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+
+    describe("BertForSequenceClassification", () => {
+      const model_id = "hf-internal-testing/tiny-random-BertForSequenceClassification";
+
+      /** @type {BertForSequenceClassification} */
+      let model;
+      /** @type {BertTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await BertForSequenceClassification.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await BertTokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const { logits } = await model(inputs);
+          const target = [[0.00043986947275698185, -0.030218850821256638]].flat();
+          expect(logits.dims).toEqual([1, 2]);
+          logits
+            .tolist()
+            .flat()
+            .forEach((item, i) => {
+              expect(item).toBeCloseTo(target[i], 5);
+            });
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const { logits } = await model(inputs);
+          const target = [
+            [0.00043986947275698185, -0.030218850821256638],
+            [0.0003853091038763523, -0.03022204339504242],
+          ].flat();
+          expect(logits.dims).toEqual([2, 2]);
+          logits
+            .tolist()
+            .flat()
+            .forEach((item, i) => {
+              expect(item).toBeCloseTo(target[i], 5);
+            });
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+
+    describe("BertForTokenClassification", () => {
+      const model_id = "hf-internal-testing/tiny-random-BertForTokenClassification";
+
+      /** @type {BertForTokenClassification} */
+      let model;
+      /** @type {BertTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await BertForTokenClassification.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await BertTokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const { logits } = await model(inputs);
+          expect(logits.dims).toEqual([1, 7, 2]);
+          expect(logits.mean().item()).toBeCloseTo(0.07089076191186905, 5);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const { logits } = await model(inputs);
+          expect(logits.dims).toEqual([2, 12, 2]);
+          expect(logits.mean().item()).toBeCloseTo(0.04702216014266014, 5);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+
+    describe("BertForQuestionAnswering", () => {
+      const model_id = "hf-internal-testing/tiny-random-BertForQuestionAnswering";
+
+      /** @type {BertForQuestionAnswering} */
+      let model;
+      /** @type {BertTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await BertForQuestionAnswering.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await BertTokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const { start_logits, end_logits } = await model(inputs);
+          expect(start_logits.dims).toEqual([1, 7]);
+          expect(start_logits.mean().item()).toBeCloseTo(0.12772157788276672, 5);
+          expect(end_logits.dims).toEqual([1, 7]);
+          expect(end_logits.mean().item()).toBeCloseTo(0.11811424791812897, 5);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const { start_logits, end_logits } = await model(inputs);
+          expect(start_logits.dims).toEqual([2, 12]);
+          expect(start_logits.mean().item()).toBeCloseTo(0.12843115627765656, 5);
+          expect(end_logits.dims).toEqual([2, 12]);
+          expect(end_logits.mean().item()).toBeCloseTo(0.11745202541351318, 5);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("t5", () => {
+    describe("T5Model", () => {
+      const model_id = "hf-internal-testing/tiny-random-T5Model";
+
+      /** @type {T5Model} */
+      let model;
+      /** @type {T5Tokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await T5Model.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await T5Tokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it("forward", async () => {
+        // Example adapted from https://huggingface.co/google-t5/t5-small#how-to-get-started-with-the-model
+        const inputs = tokenizer("Studies have been shown that owning a dog is good for you");
+        const { input_ids: decoder_input_ids } = tokenizer("Studies show that");
+
+        const { last_hidden_state } = await model({ ...inputs, decoder_input_ids });
+        expect(last_hidden_state.dims).toEqual([1, 4, 32]);
+        expect(last_hidden_state.mean().item()).toBeCloseTo(7.492632721550763e-5, 8);
+      });
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+    describe("T5ForConditionalGeneration", () => {
+      const model_id = "hf-internal-testing/tiny-random-T5ForConditionalGeneration";
+
+      /** @type {T5ForConditionalGeneration} */
+      let model;
+      /** @type {T5Tokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await T5ForConditionalGeneration.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await T5Tokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it("forward", async () => {
+        // Example adapted from https://huggingface.co/google-t5/t5-small#how-to-get-started-with-the-model
+        const inputs = tokenizer("Studies have been shown that owning a dog is good for you");
+        const { input_ids: decoder_input_ids } = tokenizer("Studies show that");
+
+        const model = await T5ForConditionalGeneration.from_pretrained(model_id, DEFAULT_MODEL_OPTIONS);
+        const outputs = await model({ ...inputs, decoder_input_ids });
+        expect(outputs.logits.dims).toEqual([1, 4, 32100]);
+        expect(outputs.logits.mean().item()).toBeCloseTo(8.867568901393952e-9, 12);
+      });
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n],
+            [0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("marian", () => {
+    describe("MarianMTModel", () => {
+      const model_id = "onnx-community/tiny-random-MarianMTModel";
+
+      /** @type {MarianMTModel} */
+      let model;
+      /** @type {MarianTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await MarianMTModel.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await MarianTokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[3n, 40672n, 8358n, 32810n, 32810n, 32810n, 32810n, 35687n, 33073n, 6870n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [3n, 40672n, 8358n, 32810n, 32810n, 32810n, 32810n, 35687n, 33073n, 6870n],
+            [3n, 40672n, 8358n, 32810n, 32810n, 32810n, 32810n, 35687n, 33073n, 6870n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("musicgen", () => {
+    describe("MusicgenForConditionalGeneration", () => {
+      const model_id = "hf-internal-testing/tiny-random-MusicgenForConditionalGeneration";
+
+      // Example adapted from https://huggingface.co/docs/transformers/model_doc/musicgen#text-conditional-generation
+      const texts = ["80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums"];
+
+      /** @type {MusicgenForConditionalGeneration} */
+      let model;
+      /** @type {T5Tokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await MusicgenForConditionalGeneration.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await T5Tokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it("forward", async () => {
+        // Example from https://huggingface.co/docs/transformers/model_doc/musicgen#transformers.MusicgenForConditionalGeneration.forward.example
+        const inputs = tokenizer(texts, { padding: true });
+        const pad_token_id = BigInt(model.generation_config.pad_token_id);
+        const decoder_input_ids = full([inputs.input_ids.dims[0] * model.config.decoder.num_codebooks, 1], pad_token_id);
+        const { logits } = await model({ ...inputs, decoder_input_ids });
+        expect(logits.dims).toEqual([8, 1, 99]);
+        expect(logits.mean().item()).toBeCloseTo(-0.0018370470497757196, 5);
+      });
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer(texts[0]);
+          const audio_values = await model.generate({ ...inputs, max_length: 10 });
+          expect(audio_values.dims).toEqual([1, 1, 1920]);
+          expect(audio_values.mean().item()).toBeCloseTo(0.16644205152988434, 5);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(texts, { padding: true });
+          const audio_values = await model.generate({ ...inputs, max_length: 10 });
+          expect(audio_values.dims).toEqual([2, 1, 1920]);
+          expect(audio_values.mean().item()).toBeCloseTo(0.16644206643104553, 5);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("whisper", () => {
+    describe("WhisperForConditionalGeneration", () => {
+      const model_id = "Xenova/tiny-random-WhisperForConditionalGeneration";
+
+      /** @type {WhisperForConditionalGeneration} */
+      let model;
+      /** @type {WhisperTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await WhisperForConditionalGeneration.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await WhisperTokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      describe("prefix tokens", () => {
+        const input_features = full([1, 80, 3000], 0.0);
+
+        describe("English-only", () => {
+          it("default", async () => {
+            const outputs = await model.generate({
+              input_features,
+              is_multilingual: false,
+              max_new_tokens: 1,
+            });
+
+            expect(outputs.tolist()).toEqual([[/* Prefix */ 50258n, 50363n, /* Generated */ 45084n]]);
+          });
+          it("return_timestamps=true", async () => {
+            const outputs = await model.generate({
+              input_features,
+              is_multilingual: false,
+              max_new_tokens: 1,
+              return_timestamps: true,
+            });
+
+            expect(outputs.tolist()).toEqual([[/* Prefix */ 50258n, /* Generated */ 50366n]]);
+          });
+        });
+
+        describe("multilingual", () => {
+          it("language unset; task unset", async () => {
+            // language defaults to 'en'
+            // task defaults to 'transcribe'
+
+            const outputs = await model.generate({
+              input_features,
+              max_new_tokens: 1,
+            });
+
+            expect(outputs.tolist()).toEqual([[/* Prefix */ 50258n, 50259n, 50359n, 50363n, /* Generated */ 45084n]]);
+          });
+
+          it("language set; task unset", async () => {
+            // task defaults to 'transcribe'
+            const outputs = await model.generate({
+              input_features,
+              max_new_tokens: 1,
+              language: "af",
+            });
+
+            expect(outputs.tolist()).toEqual([[/* Prefix */ 50258n, 50327n, 50359n, 50363n, /* Generated */ 45084n]]);
+          });
+
+          it("language set; task set", async () => {
+            const outputs = await model.generate({
+              input_features,
+              max_new_tokens: 1,
+              language: "zh",
+              task: "translate",
+            });
+
+            expect(outputs.tolist()).toEqual([[/* Prefix */ 50258n, 50260n, 50358n, 50363n, /* Generated */ 45084n]]);
+          });
+
+          it("return_timestamps=true", async () => {
+            const outputs = await model.generate({
+              input_features,
+              max_new_tokens: 1,
+              language: "en",
+              task: "transcribe",
+              return_timestamps: true,
+            });
+
+            expect(outputs.tolist()).toEqual([[/* Prefix */ 50258n, 50259n, 50359n, /* Generated */ 50400n]]);
+          });
+        });
+      });
+
+      describe("decoder_start_ids", () => {
+        const input_features = full([1, 80, 3000], 0.0);
+
+        it("broadcast inputs", async () => {
+          const { decoder_start_token_id, lang_to_id, task_to_id, no_timestamps_token_id } = model.generation_config;
+
+          const outputs = await model.generate({
+            input_features, // batch size 1
+            max_new_tokens: 1,
+            decoder_input_ids: [
+              // batch size 2
+              // <|startoftranscript|> <|lang_id|> <|task|> [<|notimestamps|>]
+              [decoder_start_token_id, lang_to_id["<|en|>"], task_to_id["translate"], no_timestamps_token_id],
+              [decoder_start_token_id, lang_to_id["<|fr|>"], task_to_id["transcribe"], no_timestamps_token_id],
+            ],
+          });
+          expect(outputs.tolist()).toEqual([
+            [/* Prefix */ 50258n, 50259n, 50358n, 50363n, /* Generated */ 45084n],
+            [/* Prefix */ 50258n, 50265n, 50359n, 50363n, /* Generated */ 45084n],
+          ]);
+        });
+      });
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("llava", () => {
+    const prompts = [
+      // Example adapted from https://huggingface.co/docs/transformers/model_doc/llava#transformers.LlavaForConditionalGeneration.forward.example
+      "<image>\nUSER: What's the content of the image?\nASSISTANT:",
+      "<image>Hi",
+    ];
+
+    // Empty white image
+    const dims = [224, 224, 3];
+    const image = new RawImage(new Uint8ClampedArray(dims[0] * dims[1] * dims[2]).fill(255), ...dims);
+
+    describe("LlavaForConditionalGeneration", () => {
+      const model_id = "Xenova/tiny-random-LlavaForConditionalGeneration";
+
+      /** @type {LlavaForConditionalGeneration} */
+      let model;
+      /** @type {LlamaTokenizer} */
+      let tokenizer;
+      /** @type {CLIPImageProcessor} */
+      let processor;
+      beforeAll(async () => {
+        model = await LlavaForConditionalGeneration.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await LlamaTokenizer.from_pretrained(model_id);
+        processor = await AutoProcessor.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it("forward", async () => {
+        const text_inputs = tokenizer(prompts[0]);
+        const vision_inputs = await processor(image);
+        const inputs = { ...text_inputs, ...vision_inputs };
+
+        const { logits } = await model(inputs);
+        expect(logits.dims).toEqual([1, 244, 32002]);
+        expect(logits.mean().item()).toBeCloseTo(-0.0005755752790719271, 8);
+      });
+
+      it(
+        "batch_size=1",
+        async () => {
+          const text_inputs = tokenizer(prompts[0]);
+          const vision_inputs = await processor(image);
+          const inputs = { ...text_inputs, ...vision_inputs };
+
+          const generate_ids = await model.generate({ ...inputs, max_new_tokens: 10 });
+          expect(generate_ids.tolist()).toEqual([[1n, 32000n, 29871n, 13n, 11889n, 29901n, 1724n, 29915n, 29879n, 278n, 2793n, 310n, 278n, 1967n, 29973n, 13n, 22933n, 9047n, 13566n, 29901n, 21557n, 16781n, 27238n, 8279n, 20454n, 11927n, 12462n, 12306n, 2414n, 7561n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const text_inputs = tokenizer(prompts, { padding: true });
+          const vision_inputs = await processor([image, image]);
+          const inputs = { ...text_inputs, ...vision_inputs };
+
+          const generate_ids = await model.generate({ ...inputs, max_new_tokens: 10 });
+          expect(generate_ids.tolist()).toEqual([
+            [1n, 32000n, 29871n, 13n, 11889n, 29901n, 1724n, 29915n, 29879n, 278n, 2793n, 310n, 278n, 1967n, 29973n, 13n, 22933n, 9047n, 13566n, 29901n, 21557n, 16781n, 27238n, 8279n, 20454n, 11927n, 12462n, 12306n, 2414n, 7561n],
+            [0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 0n, 1n, 32000n, 6324n, 1217n, 22958n, 22913n, 10381n, 148n, 31410n, 31736n, 7358n, 9150n, 28635n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("florence2", () => {
+    const texts = ["Describe with a paragraph what is shown in the image.", "Locate the objects with category name in the image."];
+
+    // Empty white image
+    const dims = [224, 224, 3];
+    const image = new RawImage(new Uint8ClampedArray(dims[0] * dims[1] * dims[2]).fill(255), ...dims);
+
+    describe("Florence2ForConditionalGeneration", () => {
+      const model_id = "Xenova/tiny-random-Florence2ForConditionalGeneration";
+
+      /** @type {Florence2ForConditionalGeneration} */
+      let model;
+      /** @type {BartTokenizer} */
+      let tokenizer;
+      /** @type {Florence2Processor} */
+      let processor;
+      beforeAll(async () => {
+        model = await Florence2ForConditionalGeneration.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await BartTokenizer.from_pretrained(model_id);
+        processor = await AutoProcessor.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it("forward", async () => {
+        const text_inputs = tokenizer(texts[0]);
+        const vision_inputs = await processor(image);
+        const inputs = {
+          ...text_inputs,
+          ...vision_inputs,
+          decoder_input_ids: full([1, 1], 2n),
+        };
+
+        const { logits } = await model(inputs);
+        expect(logits.dims).toEqual([1, 1, 51289]);
+      });
+
+      it(
+        "batch_size=1",
+        async () => {
+          const text_inputs = tokenizer(texts[0]);
+          {
+            const generate_ids = await model.generate({ ...text_inputs, max_new_tokens: 10 });
+            expect(generate_ids.tolist()).toEqual([[2n, 0n, 0n, 0n, 1n, 0n, 0n, 2n]]);
+          }
+          {
+            const vision_inputs = await processor(image);
+            const inputs = { ...text_inputs, ...vision_inputs };
+
+            const generate_ids = await model.generate({ ...inputs, max_new_tokens: 10 });
+            expect(generate_ids.tolist()).toEqual([[2n, 0n, 48n, 48n, 48n, 48n, 48n, 48n, 48n, 48n, 2n]]);
+          }
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const text_inputs = tokenizer(texts, { padding: true });
+          {
+            const generate_ids = await model.generate({ ...text_inputs, max_new_tokens: 10 });
+            expect(generate_ids.tolist()).toEqual([
+              [2n, 0n, 0n, 0n, 1n, 0n, 0n, 2n],
+              [2n, 0n, 0n, 0n, 1n, 0n, 0n, 2n],
+            ]);
+          }
+          {
+            const vision_inputs = await processor([image, image]);
+            const inputs = { ...text_inputs, ...vision_inputs };
+
+            const generate_ids = await model.generate({ ...inputs, max_new_tokens: 10 });
+            expect(generate_ids.tolist()).toEqual([
+              [2n, 0n, 48n, 48n, 48n, 48n, 48n, 48n, 48n, 48n, 2n],
+              [2n, 0n, 48n, 48n, 48n, 48n, 48n, 48n, 48n, 48n, 2n],
+            ]);
+          }
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("vision-encoder-decoder", () => {
+    describe("VisionEncoderDecoderModel", () => {
+      const model_id = "hf-internal-testing/tiny-random-VisionEncoderDecoderModel-vit-gpt2";
+
+      /** @type {VisionEncoderDecoderModel} */
+      let model;
+      /** @type {GPT2Tokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await VisionEncoderDecoderModel.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await GPT2Tokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const outputs = await model.generate({
+            pixel_values: full([1, 3, 30, 30], -1.0),
+            max_length: 5,
+          });
+          expect(outputs.tolist()).toEqual([[0n, 400n, 400n, 400n, 400n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      // TODO: Add back
+      // it('batch_size>1', async () => {
+      //     const outputs = await model.generate({
+      //         pixel_values: cat([
+      //             full([1, 3, 30, 30], -1.0),
+      //             full([1, 3, 30, 30], 0.0),
+      //         ]),
+      //         max_length: 5,
+      //     });
+      //     expect(outputs.tolist()).toEqual([
+      //         // Generation continues
+      //         [0n, 400n, 400n, 400n, 400n],
+
+      //         // Finishes early. 1023 is the padding token
+      //         [0n, 0n, 1023n, 1023n, 1023n],
+      //     ]);
+      // }, MAX_TEST_EXECUTION_TIME);
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+  describe("opt", () => {
+    describe("OPTForCausalLM", () => {
+      const model_id = "hf-internal-testing/tiny-random-OPTForCausalLM";
+      /** @type {OPTForCausalLM} */
+      let model;
+      /** @type {GPT2Tokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await OPTForCausalLM.from_pretrained(model_id, {
+          // TODO move to config
+          revision: "refs/pr/2",
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await GPT2Tokenizer.from_pretrained(model_id, {
+          // TODO update this
+          revision: "refs/pr/3",
+        });
+        tokenizer.padding_side = "left";
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[2n, 42891n, 39144n, 39144n, 39144n, 39144n, 39144n, 39144n, 39144n, 39144n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [1n, 2n, 42891n, 39144n, 39144n, 39144n, 39144n, 39144n, 39144n, 39144n],
+            [2n, 42891n, 232n, 24680n, 24680n, 24680n, 24680n, 24680n, 24680n, 24680n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("llama", () => {
+    describe("LlamaForCausalLM", () => {
+      const model_id = "hf-internal-testing/tiny-random-LlamaForCausalLM";
+      /** @type {LlamaForCausalLM} */
+      let model;
+      /** @type {LlamaTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await LlamaForCausalLM.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await LlamaTokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[1n, 22172n, 18547n, 8143n, 22202n, 9456n, 17213n, 15330n, 26591n, 15721n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [0n, 1n, 22172n, 18547n, 8143n, 22202n, 9456n, 17213n, 15330n, 26591n],
+            [1n, 22172n, 3186n, 24786n, 19169n, 20222n, 29993n, 27146n, 27426n, 24562n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+
+    describe("LlamaForCausalLM (onnxruntime-genai)", () => {
+      const model_id = "onnx-community/tiny-random-LlamaForCausalLM-ONNX";
+      /** @type {LlamaTokenizer} */
+      let tokenizer;
+      let inputs;
+      beforeAll(async () => {
+        tokenizer = await LlamaTokenizer.from_pretrained(model_id);
+        inputs = tokenizer("hello");
+      }, MAX_MODEL_LOAD_TIME);
+
+      const dtypes = ["fp32", "fp16", "q4", "q4f16"];
+
+      for (const dtype of dtypes) {
+        it(
+          `dtype=${dtype}`,
+          async () => {
+            /** @type {LlamaForCausalLM} */
+            const model = await LlamaForCausalLM.from_pretrained(model_id, {
+              // TODO move to config
+              ...DEFAULT_MODEL_OPTIONS,
+              dtype,
+            });
+
+            const outputs = await model.generate({
+              ...inputs,
+              max_length: 5,
+            });
+            expect(outputs.tolist()).toEqual([[128000n, 15339n, 15339n, 15339n, 15339n]]);
+
+            await model?.dispose();
+          },
+          MAX_TEST_TIME,
+        );
+      }
+    });
+  });
+
+  describe("granite", () => {
+    describe("GraniteForCausalLM", () => {
+      const model_id = "hf-internal-testing/tiny-random-GraniteForCausalLM";
+      /** @type {GraniteForCausalLM} */
+      let model;
+      /** @type {GPT2Tokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await GraniteForCausalLM.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await GPT2Tokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[7656n, 23147n, 31291n, 1011n, 8768n, 30904n, 9256n, 28368n, 16199n, 26560n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [0n, 7656n, 23147n, 31291n, 1011n, 8768n, 30904n, 9256n, 28368n, 16199n],
+            [7656n, 5788n, 9477n, 14490n, 18374n, 28650n, 10907n, 2989n, 14096n, 27403n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("cohere", () => {
+    describe("CohereModel", () => {
+      const model_id = "hf-internal-testing/tiny-random-CohereModel";
+      /** @type {CohereModel} */
+      let model;
+      /** @type {CohereTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await CohereModel.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await CohereTokenizer.from_pretrained(model_id);
+        tokenizer.padding_side = "left";
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const { last_hidden_state } = await model(inputs);
+          expect(last_hidden_state.dims).toEqual([1, 4, 32]);
+          expect(last_hidden_state.mean().item()).toBeCloseTo(0.0, 5);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const { last_hidden_state } = await model(inputs);
+          expect(last_hidden_state.dims).toEqual([2, 6, 32]);
+          expect(last_hidden_state.mean().item()).toBeCloseTo(9.934107758624577e-9, 5);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+
+    describe("CohereForCausalLM", () => {
+      const model_id = "hf-internal-testing/tiny-random-CohereForCausalLM";
+      /** @type {CohereForCausalLM} */
+      let model;
+      /** @type {CohereTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await CohereForCausalLM.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await CohereTokenizer.from_pretrained(model_id);
+        tokenizer.padding_side = "left";
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[5n, 203n, 790n, 87n, 87n, 87n, 87n, 87n, 87n, 87n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [0n, 0n, 5n, 203n, 790n, 87n, 87n, 87n, 87n, 87n],
+            [5n, 203n, 790n, 87n, 214n, 741n, 741n, 741n, 741n, 741n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("gemma", () => {
+    describe("GemmaForCausalLM", () => {
+      const model_id = "Xenova/tiny-random-GemmaForCausalLM";
+      /** @type {GemmaForCausalLM} */
+      let model;
+      /** @type {GemmaTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await GemmaForCausalLM.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await GemmaTokenizer.from_pretrained(model_id);
+        tokenizer.padding_side = "left";
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[2n, 17534n, 254059n, 254059n, 254059n, 254059n, 254059n, 254059n, 254059n, 254059n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [0n, 2n, 17534n, 254059n, 254059n, 254059n, 254059n, 254059n, 254059n, 254059n],
+            [2n, 17534n, 2134n, 71055n, 71055n, 71055n, 71055n, 71055n, 71055n, 71055n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("gemma", () => {
+    describe("Gemma2ForCausalLM", () => {
+      const model_id = "hf-internal-testing/tiny-random-Gemma2ForCausalLM";
+      /** @type {Gemma2ForCausalLM} */
+      let model;
+      /** @type {GemmaTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await Gemma2ForCausalLM.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await GemmaTokenizer.from_pretrained(model_id);
+        tokenizer.padding_side = "left";
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[2n, 17534n, 127534n, 160055n, 160055n, 160055n, 160055n, 160055n, 160055n, 160055n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [0n, 2n, 17534n, 127534n, 127534n, 215341n, 215341n, 215341n, 215341n, 215341n],
+            [2n, 17534n, 2134n, 107508n, 160055n, 160055n, 160055n, 160055n, 160055n, 160055n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("gpt_neo", () => {
+    describe("GPTNeoForCausalLM", () => {
+      const model_id = "hf-internal-testing/tiny-random-GPTNeoForCausalLM";
+      /** @type {GPTNeoForCausalLM} */
+      let model;
+      /** @type {GPT2Tokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await GPTNeoForCausalLM.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await GPT2Tokenizer.from_pretrained(model_id);
+        tokenizer.padding_side = "left";
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[258n, 863n, 79n, 79n, 79n, 949n, 949n, 949n, 949n, 949n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [0n, 0n, 258n, 863n, 79n, 79n, 79n, 949n, 949n, 949n],
+            [258n, 863n, 79n, 269n, 813n, 849n, 849n, 849n, 849n, 849n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("gpt_neox", () => {
+    describe("GPTNeoXForCausalLM", () => {
+      const model_id = "hf-internal-testing/tiny-random-GPTNeoXForCausalLM";
+      /** @type {GPTNeoXForCausalLM} */
+      let model;
+      /** @type {GPTNeoXTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await GPTNeoXForCausalLM.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await GPTNeoXTokenizer.from_pretrained(model_id);
+        tokenizer.padding_side = "left";
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[259n, 864n, 80n, 881n, 502n, 895n, 938n, 668n, 502n, 895n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [0n, 0n, 259n, 864n, 80n, 881n, 502n, 895n, 938n, 668n],
+            [259n, 864n, 80n, 270n, 814n, 522n, 112n, 268n, 503n, 468n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("gptj", () => {
+    describe("GPTJForCausalLM", () => {
+      const model_id = "hf-internal-testing/tiny-random-GPTJForCausalLM";
+      /** @type {GPTJForCausalLM} */
+      let model;
+      /** @type {GPTNeoXTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await GPTJForCausalLM.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await GPTNeoXTokenizer.from_pretrained(model_id);
+        tokenizer.padding_side = "left";
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[258n, 863n, 79n, 102n, 401n, 773n, 889n, 159n, 957n, 869n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [0n, 0n, 258n, 863n, 79n, 102n, 401n, 773n, 889n, 159n],
+            [258n, 863n, 79n, 269n, 813n, 879n, 175n, 39n, 141n, 1000n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("bloom", () => {
+    describe("BloomForCausalLM", () => {
+      const model_id = "hf-internal-testing/tiny-random-BloomForCausalLM";
+      /** @type {BloomForCausalLM} */
+      let model;
+      /** @type {BloomTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await BloomForCausalLM.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await BloomTokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[198n, 803n, 82n, 82n, 82n, 82n, 82n, 82n, 82n, 82n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [3n, 3n, 198n, 803n, 82n, 82n, 82n, 82n, 82n, 82n],
+            [198n, 803n, 82n, 209n, 753n, 753n, 753n, 753n, 753n, 753n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("gpt_bigcode", () => {
+    describe("GPTBigCodeForCausalLM", () => {
+      const model_id = "hf-internal-testing/tiny-random-GPTBigCodeForCausalLM";
+      /** @type {GPTBigCodeForCausalLM} */
+      let model;
+      /** @type {GPT2Tokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await GPTBigCodeForCausalLM.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await GPT2Tokenizer.from_pretrained(model_id);
+        tokenizer.padding_side = "left";
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[258n, 863n, 79n, 79n, 79n, 79n, 79n, 79n, 79n, 79n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [0n, 0n, 258n, 863n, 79n, 79n, 79n, 79n, 79n, 79n],
+            [258n, 863n, 79n, 269n, 813n, 832n, 93n, 93n, 93n, 93n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("gpt2", () => {
+    describe("GPT2LMHeadModel", () => {
+      const model_id = "hf-internal-testing/tiny-random-GPT2LMHeadModel";
+      /** @type {GPT2LMHeadModel} */
+      let model;
+      /** @type {GPT2Tokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await GPT2LMHeadModel.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await GPT2Tokenizer.from_pretrained(model_id);
+        tokenizer.padding_side = "left";
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[258n, 863n, 79n, 79n, 79n, 79n, 79n, 79n, 79n, 243n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [0n, 0n, 258n, 863n, 79n, 79n, 79n, 79n, 79n, 79n],
+            [258n, 863n, 79n, 269n, 813n, 813n, 813n, 813n, 813n, 813n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("jais", () => {
+    describe("JAISLMHeadModel", () => {
+      const model_id = "onnx-community/tiny-random-jais";
+      /** @type {JAISLMHeadModel} */
+      let model;
+      /** @type {PreTrainedTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await JAISLMHeadModel.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await PreTrainedTokenizer.from_pretrained(model_id);
+        tokenizer.padding_side = "left";
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[55422n, 55422n, 55422n, 55422n, 55422n, 55422n, 55422n, 55422n, 55422n, 55422n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [0n, 55422n, 55422n, 55422n, 55422n, 55422n, 55422n, 55422n, 55422n, 55422n],
+            [55422n, 2838n, 2838n, 2838n, 2838n, 2838n, 2838n, 2838n, 2838n, 2838n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("mpt", () => {
+    describe("MptForCausalLM", () => {
+      const model_id = "hf-internal-testing/tiny-random-MptForCausalLM";
+      /** @type {MptForCausalLM} */
+      let model;
+      /** @type {GPTNeoXTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await MptForCausalLM.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await GPTNeoXTokenizer.from_pretrained(model_id);
+        tokenizer.padding_side = "left";
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[259n, 864n, 80n, 80n, 80n, 80n, 80n, 80n, 80n, 80n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [0n, 0n, 259n, 864n, 80n, 80n, 80n, 80n, 80n, 80n],
+            [259n, 864n, 80n, 270n, 814n, 293n, 293n, 293n, 293n, 293n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("codegen", () => {
+    describe("CodeGenForCausalLM", () => {
+      const model_id = "hf-internal-testing/tiny-random-CodeGenForCausalLM";
+      /** @type {CodeGenForCausalLM} */
+      let model;
+      /** @type {CodeGenTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await CodeGenForCausalLM.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await CodeGenTokenizer.from_pretrained(model_id);
+        tokenizer.padding_side = "left";
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[258n, 863n, 79n, 437n, 334n, 450n, 294n, 621n, 375n, 385n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [0n, 0n, 258n, 863n, 79n, 437n, 334n, 450n, 294n, 621n],
+            [258n, 863n, 79n, 269n, 813n, 759n, 113n, 295n, 574n, 987n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+
+  describe("mistral", () => {
+    describe("MistralForCausalLM", () => {
+      const model_id = "hf-internal-testing/tiny-random-MistralForCausalLM";
+      /** @type {MistralForCausalLM} */
+      let model;
+      /** @type {LlamaTokenizer} */
+      let tokenizer;
+      beforeAll(async () => {
+        model = await MistralForCausalLM.from_pretrained(model_id, {
+          // TODO move to config
+          ...DEFAULT_MODEL_OPTIONS,
+        });
+        tokenizer = await LlamaTokenizer.from_pretrained(model_id);
+      }, MAX_MODEL_LOAD_TIME);
+
+      it(
+        "batch_size=1",
+        async () => {
+          const inputs = tokenizer("hello");
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([[1n, 6312n, 28709n, 24704n, 8732n, 1310n, 9808n, 13771n, 27309n, 4779n]]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "batch_size>1",
+        async () => {
+          const inputs = tokenizer(["hello", "hello world"], { padding: true });
+          const outputs = await model.generate({
+            ...inputs,
+            max_length: 10,
+          });
+          expect(outputs.tolist()).toEqual([
+            [2n, 1n, 6312n, 28709n, 24704n, 8732n, 1310n, 9808n, 13771n, 27309n],
+            [1n, 6312n, 28709n, 1526n, 8687n, 5690n, 1770n, 30811n, 12501n, 3325n],
+          ]);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      afterAll(async () => {
+        await model?.dispose();
+      }, MAX_MODEL_DISPOSE_TIME);
+    });
+  });
+});
+
+describe("Tiny random pipelines", () => {
+  describe("fill-mask", () => {
+    const model_id = "hf-internal-testing/tiny-random-BertForMaskedLM";
+
+    /** @type {FillMaskPipeline} */
+    let pipe;
+    beforeAll(async () => {
+      pipe = await pipeline("fill-mask", model_id, {
+        // TODO move to config
+        ...DEFAULT_MODEL_OPTIONS,
+      });
+    }, MAX_MODEL_LOAD_TIME);
+
+    describe("batch_size=1", () => {
+      it("default (top_k=5)", async () => {
+        const output = await pipe("a [MASK] c");
+        const target = [
+          { score: 0.0013377574505284429, token: 854, token_str: "##ο", sequence: "aο c" },
+          { score: 0.001248967950232327, token: 962, token_str: "##ち", sequence: "aち c" },
+          { score: 0.0012304208939895034, token: 933, token_str: "##ع", sequence: "aع c" },
+          { score: 0.0012301815440878272, token: 313, token_str: "ფ", sequence: "a ფ c" },
+          { score: 0.001222139224410057, token: 624, token_str: "未", sequence: "a 未 c" },
+        ];
+        compare(output, target, 1e-5);
+      });
+      it("custom (top_k=2)", async () => {
+        const output = await pipe("a [MASK] c", { top_k: 2 });
+        const target = [
+          { score: 0.0013377574505284429, token: 854, token_str: "##ο", sequence: "aο c" },
+          { score: 0.001248967950232327, token: 962, token_str: "##ち", sequence: "aち c" },
+        ];
+        compare(output, target, 1e-5);
+      });
+    });
+
+    describe("batch_size>1", () => {
+      it("default (top_k=5)", async () => {
+        const output = await pipe(["a [MASK] c", "a b [MASK] c"]);
+        const target = [
+          [
+            { score: 0.0013377574505284429, token: 854, token_str: "##ο", sequence: "aο c" },
+            { score: 0.001248967950232327, token: 962, token_str: "##ち", sequence: "aち c" },
+            { score: 0.0012304208939895034, token: 933, token_str: "##ع", sequence: "aع c" },
+            { score: 0.0012301815440878272, token: 313, token_str: "ფ", sequence: "a ფ c" },
+            { score: 0.001222139224410057, token: 624, token_str: "未", sequence: "a 未 c" },
+          ],
+          [
+            { score: 0.0013287801994010806, token: 962, token_str: "##ち", sequence: "a bち c" },
+            { score: 0.0012486606137827039, token: 823, token_str: "##ن", sequence: "a bن c" },
+            { score: 0.0012320734094828367, token: 1032, token_str: "##ც", sequence: "a bც c" },
+            { score: 0.0012295148335397243, token: 854, token_str: "##ο", sequence: "a bο c" },
+            { score: 0.0012277684872969985, token: 624, token_str: "未", sequence: "a b 未 c" },
+          ],
+        ];
+        compare(output, target, 1e-5);
+      });
+      it("custom (top_k=2)", async () => {
+        const output = await pipe(["a [MASK] c", "a b [MASK] c"], { top_k: 2 });
+        const target = [
+          [
+            { score: 0.0013377574505284429, token: 854, token_str: "##ο", sequence: "aο c" },
+            { score: 0.001248967950232327, token: 962, token_str: "##ち", sequence: "aち c" },
+          ],
+          [
+            { score: 0.0013287801994010806, token: 962, token_str: "##ち", sequence: "a bち c" },
+            { score: 0.0012486606137827039, token: 823, token_str: "##ن", sequence: "a bن c" },
+          ],
+        ];
+        compare(output, target, 1e-5);
+      });
+    });
+
+    afterAll(async () => {
+      await pipe?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+
+  describe("text-classification", () => {
+    const model_id = "hf-internal-testing/tiny-random-BertForSequenceClassification";
+
+    /** @type {TextClassificationPipeline} */
+    let pipe;
+    beforeAll(async () => {
+      pipe = await pipeline("text-classification", model_id, {
+        // TODO move to config
+        ...DEFAULT_MODEL_OPTIONS,
+      });
+    }, MAX_MODEL_LOAD_TIME);
+
+    describe("batch_size=1", () => {
+      it("default (top_k=1)", async () => {
+        const output = await pipe("a");
+        const target = [{ label: "LABEL_0", score: 0.5076976418495178 }];
+        compare(output, target, 1e-5);
+      });
+      it("custom (top_k=2)", async () => {
+        const output = await pipe("a", { top_k: 2 });
+        const target = [
+          { label: "LABEL_0", score: 0.5076976418495178 },
+          { label: "LABEL_1", score: 0.49230238795280457 },
+        ];
+        compare(output, target, 1e-5);
+      });
+    });
+
+    describe("batch_size>1", () => {
+      it("default (top_k=1)", async () => {
+        const output = await pipe(["a", "b c"]);
+        const target = [
+          { label: "LABEL_0", score: 0.5076976418495178 },
+          { label: "LABEL_0", score: 0.5077522993087769 },
+        ];
+        compare(output, target, 1e-5);
+      });
+      it("custom (top_k=2)", async () => {
+        const output = await pipe(["a", "b c"], { top_k: 2 });
+        const target = [
+          [
+            { label: "LABEL_0", score: 0.5076976418495178 },
+            { label: "LABEL_1", score: 0.49230238795280457 },
+          ],
+          [
+            { label: "LABEL_0", score: 0.5077522993087769 },
+            { label: "LABEL_1", score: 0.49224773049354553 },
+          ],
+        ];
+        compare(output, target, 1e-5);
+      });
+
+      it("multi_label_classification", async () => {
+        const problem_type = pipe.model.config.problem_type;
+        pipe.model.config.problem_type = "multi_label_classification";
+
+        const output = await pipe(["a", "b c"], { top_k: 2 });
+        const target = [
+          [
+            { label: "LABEL_0", score: 0.5001373887062073 },
+            { label: "LABEL_1", score: 0.49243971705436707 },
+          ],
+          [
+            { label: "LABEL_0", score: 0.5001326203346252 },
+            { label: "LABEL_1", score: 0.492380291223526 },
+          ],
+        ];
+        compare(output, target, 1e-5);
+
+        // Reset problem type
+        pipe.model.config.problem_type = problem_type;
+      });
+    });
+
+    afterAll(async () => {
+      await pipe?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+
+  describe("token-classification", () => {
+    const model_id = "hf-internal-testing/tiny-random-BertForTokenClassification";
+
+    /** @type {TokenClassificationPipeline} */
+    let pipe;
+    beforeAll(async () => {
+      pipe = await pipeline("token-classification", model_id, {
+        // TODO move to config
+        ...DEFAULT_MODEL_OPTIONS,
+      });
+    }, MAX_MODEL_LOAD_TIME);
+
+    describe("batch_size=1", () => {
+      it("default", async () => {
+        const output = await pipe("1 2 3");
+
+        // TODO: Add start/end to target
+        const target = [
+          {
+            entity: "LABEL_0",
+            score: 0.5292708,
+            index: 1,
+            word: "1",
+            // 'start': 0, 'end': 1
+          },
+          {
+            entity: "LABEL_0",
+            score: 0.5353687,
+            index: 2,
+            word: "2",
+            // 'start': 2, 'end': 3
+          },
+          {
+            entity: "LABEL_1",
+            score: 0.51381934,
+            index: 3,
+            word: "3",
+            // 'start': 4, 'end': 5
+          },
+        ];
+        compare(output, target, 1e-5);
+      });
+      it("custom (ignore_labels set)", async () => {
+        const output = await pipe("1 2 3", { ignore_labels: ["LABEL_0"] });
+        const target = [
+          {
+            entity: "LABEL_1",
+            score: 0.51381934,
+            index: 3,
+            word: "3",
+            // 'start': 4, 'end': 5
+          },
+        ];
+        compare(output, target, 1e-5);
+      });
+    });
+
+    describe("batch_size>1", () => {
+      it("default", async () => {
+        const output = await pipe(["1 2 3", "4 5"]);
+        const target = [
+          [
+            {
+              entity: "LABEL_0",
+              score: 0.5292708,
+              index: 1,
+              word: "1",
+              // 'start': 0, 'end': 1
+            },
+            {
+              entity: "LABEL_0",
+              score: 0.5353687,
+              index: 2,
+              word: "2",
+              // 'start': 2, 'end': 3
+            },
+            {
+              entity: "LABEL_1",
+              score: 0.51381934,
+              index: 3,
+              word: "3",
+              // 'start': 4, 'end': 5
+            },
+          ],
+          [
+            {
+              entity: "LABEL_0",
+              score: 0.5432807,
+              index: 1,
+              word: "4",
+              // 'start': 0, 'end': 1
+            },
+            {
+              entity: "LABEL_1",
+              score: 0.5007693,
+              index: 2,
+              word: "5",
+              // 'start': 2, 'end': 3
+            },
+          ],
+        ];
+        compare(output, target, 1e-5);
+      });
+      it("custom (ignore_labels set)", async () => {
+        const output = await pipe(["1 2 3", "4 5"], { ignore_labels: ["LABEL_0"] });
+        const target = [
+          [
+            {
+              entity: "LABEL_1",
+              score: 0.51381934,
+              index: 3,
+              word: "3",
+              // 'start': 4, 'end': 5
+            },
+          ],
+          [
+            {
+              entity: "LABEL_1",
+              score: 0.5007693,
+              index: 2,
+              word: "5",
+              // 'start': 2, 'end': 3
+            },
+          ],
+        ];
+        compare(output, target, 1e-5);
+      });
+    });
+
+    afterAll(async () => {
+      await pipe?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+
+  describe("question-answering", () => {
+    const model_id = "hf-internal-testing/tiny-random-BertForQuestionAnswering";
+
+    /** @type {QuestionAnsweringPipeline} */
+    let pipe;
+    beforeAll(async () => {
+      pipe = await pipeline("question-answering", model_id, {
+        // TODO move to config
+        ...DEFAULT_MODEL_OPTIONS,
+      });
+    }, MAX_MODEL_LOAD_TIME);
+
+    describe("batch_size=1", () => {
+      it("default (top_k=1)", async () => {
+        const output = await pipe("a", "b c");
+        const target = { score: 0.11395696550607681, /* start: 0, end: 1, */ answer: "b" };
+        compare(output, target, 1e-5);
+      });
+      it("custom (top_k=3)", async () => {
+        const output = await pipe("a", "b c", { top_k: 3 });
+        const target = [
+          { score: 0.11395696550607681, /* start: 0, end: 1, */ answer: "b" },
+          { score: 0.11300431191921234, /* start: 2, end: 3, */ answer: "c" },
+          { score: 0.10732574015855789, /* start: 0, end: 3, */ answer: "b c" },
+        ];
+        compare(output, target, 1e-5);
+      });
+    });
+
+    afterAll(async () => {
+      await pipe?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+
+  describe("image-classification", () => {
+    const model_id = "hf-internal-testing/tiny-random-vit";
+    const urls = ["https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/white-image.png", "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/blue-image.png"];
+
+    /** @type {ImageClassificationPipeline} */
+    let pipe;
+    beforeAll(async () => {
+      pipe = await pipeline("image-classification", model_id, {
+        // TODO move to config
+        ...DEFAULT_MODEL_OPTIONS,
+      });
+    }, MAX_MODEL_LOAD_TIME);
+
+    describe("batch_size=1", () => {
+      it("default (top_k=5)", async () => {
+        const output = await pipe(urls[0]);
+        const target = [
+          { label: "LABEL_1", score: 0.5020533800125122 },
+          { label: "LABEL_0", score: 0.4979466497898102 },
+        ];
+        compare(output, target, 1e-5);
+      });
+      it("custom (top_k=1)", async () => {
+        const output = await pipe(urls[0], { top_k: 1 });
+        const target = [{ label: "LABEL_1", score: 0.5020533800125122 }];
+        compare(output, target, 1e-5);
+      });
+    });
+
+    describe("batch_size>1", () => {
+      it("default (top_k=5)", async () => {
+        const output = await pipe(urls);
+        const target = [
+          [
+            { label: "LABEL_1", score: 0.5020533800125122 },
+            { label: "LABEL_0", score: 0.4979466497898102 },
+          ],
+          [
+            { label: "LABEL_1", score: 0.519227921962738 },
+            { label: "LABEL_0", score: 0.4807720482349396 },
+          ],
+        ];
+        compare(output, target, 1e-5);
+      });
+      it("custom (top_k=1)", async () => {
+        const output = await pipe(urls, { top_k: 1 });
+        const target = [[{ label: "LABEL_1", score: 0.5020533800125122 }], [{ label: "LABEL_1", score: 0.519227921962738 }]];
+        compare(output, target, 1e-5);
+      });
+    });
+
+    afterAll(async () => {
+      await pipe?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+
+  describe("zero-shot-image-classification", () => {
+    const model_id = "hf-internal-testing/tiny-random-GroupViTModel";
+
+    // Example adapted from https://huggingface.co/docs/transformers/en/model_doc/groupvit
+    const urls = ["https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/white-image.png", "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/blue-image.png"];
+    const labels = ["cat", "dog"];
+    const hypothesis_template = "a photo of a {}";
+
+    /** @type {ZeroShotImageClassificationPipeline} */
+    let pipe;
+    beforeAll(async () => {
+      pipe = await pipeline("zero-shot-image-classification", model_id, {
+        // TODO move to config
+        ...DEFAULT_MODEL_OPTIONS,
+      });
+    }, MAX_MODEL_LOAD_TIME);
+
+    describe("batch_size=1", () => {
+      it("default", async () => {
+        const output = await pipe(urls[0], labels);
+        const target = [
+          { score: 0.5990662574768066, label: "cat" },
+          { score: 0.40093377232551575, label: "dog" },
+        ];
+        compare(output, target, 1e-5);
+      });
+      it("custom (w/ hypothesis_template)", async () => {
+        const output = await pipe(urls[0], labels, { hypothesis_template });
+        const target = [
+          { score: 0.5527022480964661, label: "cat" },
+          { score: 0.44729775190353394, label: "dog" },
+        ];
+        compare(output, target, 1e-5);
+      });
+    });
+
+    describe("batch_size>1", () => {
+      it("default", async () => {
+        const output = await pipe(urls, labels);
+        const target = [
+          [
+            { score: 0.5990662574768066, label: "cat" },
+            { score: 0.40093377232551575, label: "dog" },
+          ],
+          [
+            { score: 0.5006340146064758, label: "dog" },
+            { score: 0.49936598539352417, label: "cat" },
+          ],
+        ];
+        compare(output, target, 1e-5);
+      });
+      it("custom (w/ hypothesis_template)", async () => {
+        const output = await pipe(urls, labels, { hypothesis_template });
+        const target = [
+          [
+            { score: 0.5527022480964661, label: "cat" },
+            { score: 0.44729775190353394, label: "dog" },
+          ],
+          [
+            { score: 0.5395973324775696, label: "cat" },
+            { score: 0.46040263772010803, label: "dog" },
+          ],
+        ];
+        compare(output, target, 1e-5);
+      });
+    });
+
+    afterAll(async () => {
+      await pipe?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+
+  describe("audio-classification", () => {
+    const model_id = "hf-internal-testing/tiny-random-unispeech";
+    const audios = [new Float32Array(16000).fill(0), Float32Array.from({ length: 16000 }, (_, i) => i)];
+
+    /** @type {ImageClassificationPipeline} */
+    let pipe;
+    beforeAll(async () => {
+      pipe = await pipeline("audio-classification", model_id, {
+        // TODO move to config
+        ...DEFAULT_MODEL_OPTIONS,
+      });
+    }, MAX_MODEL_LOAD_TIME);
+
+    describe("batch_size=1", () => {
+      it("default (top_k=5)", async () => {
+        const output = await pipe(audios[0]);
+        const target = [
+          { score: 0.5043687224388123, label: "LABEL_0" },
+          { score: 0.4956313371658325, label: "LABEL_1" },
+        ];
+        compare(output, target, 1e-5);
+      });
+      it("custom (top_k=1)", async () => {
+        const output = await pipe(audios[0], { top_k: 1 });
+        const target = [{ score: 0.5043687224388123, label: "LABEL_0" }];
+        compare(output, target, 1e-5);
+      });
+    });
+
+    describe("batch_size>1", () => {
+      it("default (top_k=5)", async () => {
+        const output = await pipe(audios);
+        const target = [
+          [
+            { score: 0.5043687224388123, label: "LABEL_0" },
+            { score: 0.4956313371658325, label: "LABEL_1" },
+          ],
+          [
+            { score: 0.5187293887138367, label: "LABEL_0" },
+            { score: 0.4812707006931305, label: "LABEL_1" },
+          ],
+        ];
+        compare(output, target, 1e-5);
+      });
+      it("custom (top_k=1)", async () => {
+        const output = await pipe(audios, { top_k: 1 });
+        const target = [[{ score: 0.5043687224388123, label: "LABEL_0" }], [{ score: 0.5187293887138367, label: "LABEL_0" }]];
+        compare(output, target, 1e-5);
+      });
+    });
+
+    afterAll(async () => {
+      await pipe?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+
+  describe("text-generation", () => {
+    const model_id = "hf-internal-testing/tiny-random-LlamaForCausalLM";
+
+    /** @type {TextGenerationPipeline} */
+    let pipe;
+    beforeAll(async () => {
+      pipe = await pipeline("text-generation", model_id, {
+        // TODO move to config
+        ...DEFAULT_MODEL_OPTIONS,
+      });
+    }, MAX_MODEL_LOAD_TIME);
+
+    describe("batch_size=1", () => {
+      const text_input = "hello";
+      const generated_text_target = "erdingsAndroid Load";
+      const text_target = [{ generated_text: text_input + generated_text_target }];
+      const new_text_target = [{ generated_text: generated_text_target }];
+
+      const chat_input = [
+        { role: "system", content: "a" },
+        { role: "user", content: "b" },
+      ];
+      const chat_target = [
+        {
+          generated_text: [
+            { role: "system", content: "a" },
+            { role: "user", content: "b" },
+            { role: "assistant", content: " Southern abund Load" },
+          ],
+        },
+      ];
+
+      it("text input (single)", async () => {
+        const output = await pipe(text_input, { max_new_tokens: 3 });
+        compare(output, text_target);
+      });
+      it("text input (list)", async () => {
+        const output = await pipe([text_input], { max_new_tokens: 3 });
+        compare(output, [text_target]);
+      });
+
+      it("text input (single) - return_full_text=false", async () => {
+        const output = await pipe(text_input, { max_new_tokens: 3, return_full_text: false });
+        compare(output, new_text_target);
+      });
+      it("text input (list) - return_full_text=false", async () => {
+        const output = await pipe([text_input], { max_new_tokens: 3, return_full_text: false });
+        compare(output, [new_text_target]);
+      });
+
+      it("chat input (single)", async () => {
+        const output = await pipe(chat_input, { max_new_tokens: 3 });
+        compare(output, chat_target);
+      });
+      it("chat input (list)", async () => {
+        const output = await pipe([chat_input], { max_new_tokens: 3 });
+        compare(output, [chat_target]);
+      });
+    });
+
+    // TODO: Fix batch_size>1
+    // describe('batch_size>1', () => {
+    //     it('default', async () => {
+    //         const output = await pipe(['hello', 'hello world']);
+    //         const target = [
+    //            [{generated_text: 'helloerdingsAndroid Load'}],
+    //            [{generated_text: 'hello world zerosMillнал'}],
+    //         ];
+    //         compare(output, target);
+    //     });
+    // });
+
+    afterAll(async () => {
+      await pipe?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+
+  describe("translation", () => {
+    const model_id = "Xenova/tiny-random-M2M100ForConditionalGeneration";
+
+    /** @type {TextGenerationPipeline} */
+    let pipe;
+    beforeAll(async () => {
+      pipe = await pipeline("translation", model_id, {
+        // TODO move to config
+        ...DEFAULT_MODEL_OPTIONS,
+      });
+    }, MAX_MODEL_LOAD_TIME);
+
+    describe("batch_size=1", () => {
+      it("default", async () => {
+        const text = "जीवन एक चॉकलेट बॉक्स की तरह है।";
+        const output = await pipe(text, {
+          src_lang: "hi",
+          tgt_lang: "fr",
+          max_new_tokens: 5,
+        });
+        const target = [{ translation_text: "Slovenska төсли төсли төсли" }];
+        compare(output, target);
+      });
+    });
+
+    afterAll(async () => {
+      await pipe?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+
+  describe("object-detection", () => {
+    const model_id = "hf-internal-testing/tiny-random-DetrForObjectDetection";
+    const urls = ["https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/white-image.png", "https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/blue-image.png"];
+
+    /** @type {ImageClassificationPipeline} */
+    let pipe;
+    beforeAll(async () => {
+      pipe = await pipeline("object-detection", model_id, {
+        // TODO move to config
+        ...DEFAULT_MODEL_OPTIONS,
+      });
+    }, MAX_MODEL_LOAD_TIME);
+
+    describe("batch_size=1", () => {
+      it("default (threshold unset)", async () => {
+        const output = await pipe(urls[0]);
+        const target = [];
+        compare(output, target, 1e-5);
+      });
+      it("default (threshold=0)", async () => {
+        const output = await pipe(urls[0], { threshold: 0 });
+        const target = [
+          { score: 0.020360443741083145, label: "LABEL_31", box: { xmin: 56, ymin: 55, xmax: 169, ymax: 167 } },
+          { score: 0.020360419526696205, label: "LABEL_31", box: { xmin: 56, ymin: 55, xmax: 169, ymax: 167 } },
+          { score: 0.02036038413643837, label: "LABEL_31", box: { xmin: 56, ymin: 55, xmax: 169, ymax: 167 } },
+          { score: 0.020360447466373444, label: "LABEL_31", box: { xmin: 56, ymin: 55, xmax: 169, ymax: 167 } },
+          { score: 0.020360389724373817, label: "LABEL_31", box: { xmin: 56, ymin: 55, xmax: 169, ymax: 167 } },
+          { score: 0.020360423251986504, label: "LABEL_31", box: { xmin: 56, ymin: 55, xmax: 169, ymax: 167 } },
+          { score: 0.02036040835082531, label: "LABEL_31", box: { xmin: 56, ymin: 55, xmax: 169, ymax: 167 } },
+          { score: 0.020360363647341728, label: "LABEL_31", box: { xmin: 56, ymin: 55, xmax: 169, ymax: 167 } },
+          { score: 0.020360389724373817, label: "LABEL_31", box: { xmin: 56, ymin: 55, xmax: 169, ymax: 167 } },
+          { score: 0.020360389724373817, label: "LABEL_31", box: { xmin: 56, ymin: 55, xmax: 169, ymax: 167 } },
+          { score: 0.020360343158245087, label: "LABEL_31", box: { xmin: 56, ymin: 55, xmax: 169, ymax: 167 } },
+          { score: 0.020360423251986504, label: "LABEL_31", box: { xmin: 56, ymin: 55, xmax: 169, ymax: 167 } },
+        ];
+        compare(output, target, 1e-5);
+      });
+    });
+
+    // TODO: Add batched support to object detection pipeline
+    // describe('batch_size>1', () => {
+    //     it('default (threshold unset)', async () => {
+    //         const output = await pipe(urls);
+    //         console.log(output);
+    //         const target = [];
+    //         compare(output, target, 1e-5);
+    //     });
+    //     it('default (threshold=0)', async () => {
+    //         const output = await pipe(urls, { threshold: 0 });
+    //         console.log(output);
+    //         const target = [];
+    //         compare(output, target, 1e-5);
+    //     });
+    // });
+
+    afterAll(async () => {
+      await pipe?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+});
+
+describe("PKV caching", () => {
+  describe("LlamaForCausalLM", () => {
+    const model_id = "hf-internal-testing/tiny-random-LlamaForCausalLM";
+    /** @type {LlamaForCausalLM} */
+    let model;
+    /** @type {LlamaTokenizer} */
+    let tokenizer;
+    beforeAll(async () => {
+      model = await LlamaForCausalLM.from_pretrained(model_id, {
+        // TODO move to config
+        ...DEFAULT_MODEL_OPTIONS,
+      });
+      tokenizer = await LlamaTokenizer.from_pretrained(model_id);
+    }, MAX_MODEL_LOAD_TIME);
+
+    it(
+      "batch_size=1",
+      async () => {
+        const inputs = tokenizer("1");
+
+        // Generate first sequence w/o PKV
+        // NOTE: `return_dict_in_generate=true` is required to get PKV
+        const { past_key_values, sequences } = await model.generate({
+          ...inputs,
+          max_new_tokens: 5,
+          do_sample: false,
+          return_dict_in_generate: true,
+        });
+
+        // Update output with new text
+        const decoded = tokenizer.batch_decode(sequences, {
+          skip_special_tokens: false,
+        })[0];
+        const new_inputs = tokenizer(decoded + "2", {
+          add_special_tokens: false,
+        });
+
+        // Run w/o PKV
+        const generated_ids = await model.generate({
+          ...new_inputs,
+          max_new_tokens: 3,
+          do_sample: false,
+        });
+
+        // Run w/ PKV
+        const generated_ids_pkv = await model.generate({
+          ...new_inputs,
+          past_key_values,
+          max_new_tokens: 3,
+          do_sample: false,
+        });
+
+        const target = [[1n, 259n, 29896n, 24959n, 22063n, 17192n, 12189n, 22468n, 29906n, 3399n, 24823n, 26470n]];
+
+        expect(generated_ids.tolist()).toEqual(target);
+        expect(generated_ids_pkv.tolist()).toEqual(target);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    afterAll(async () => {
+      await model?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+
+  describe("LlavaForConditionalGeneration", () => {
+    const model_id = "Xenova/tiny-random-LlavaForConditionalGeneration";
+    /** @type {LlavaForConditionalGeneration} */
+    let model;
+    /** @type {PreTrainedTokenizer} */
+    let tokenizer;
+    /** @type {Processor} */
+    let processor;
+    beforeAll(async () => {
+      model = await LlavaForConditionalGeneration.from_pretrained(model_id, {
+        // TODO move to config
+        ...DEFAULT_MODEL_OPTIONS,
+      });
+      tokenizer = await AutoTokenizer.from_pretrained(model_id);
+      processor = await AutoProcessor.from_pretrained(model_id);
+    }, MAX_MODEL_LOAD_TIME);
+
+    it(
+      "batch_size=1",
+      async () => {
+        const text_inputs = tokenizer("<image>hello");
+
+        // Empty white image
+        const dims = [224, 224, 3];
+        const image = new RawImage(new Uint8ClampedArray(dims[0] * dims[1] * dims[2]).fill(255), ...dims);
+        const vision_inputs = await processor(image);
+
+        // Generate first sequence w/o PKV
+        // NOTE: `return_dict_in_generate=true` is required to get PKV
+        const { past_key_values, sequences } = await model.generate({
+          ...text_inputs,
+          ...vision_inputs,
+          max_new_tokens: 5,
+          do_sample: false,
+          return_dict_in_generate: true,
+        });
+
+        // Update output with new text
+        const decoded = tokenizer.batch_decode(sequences).map((x) => x + "new");
+        const new_inputs = tokenizer(decoded, {
+          add_special_tokens: false,
+        });
+
+        // Run w/o PKV
+        const generated_ids = await model.generate({
+          ...new_inputs,
+          ...vision_inputs,
+          max_new_tokens: 3,
+          do_sample: false,
+        });
+
+        // Run w/ PKV
+        const generated_ids_pkv = await model.generate({
+          ...new_inputs,
+          past_key_values,
+          max_new_tokens: 3,
+          do_sample: false,
+        });
+
+        const target = [[1n, 32000n, 29871n, 23927n, 359n, 1519n, 568n, 5769n, 1330n, 21544n, 11568n, 1482n, 7258n, 1250n, 16117n]];
+        expect(generated_ids.tolist()).toEqual(target);
+        expect(generated_ids_pkv.tolist()).toEqual(target);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    afterAll(async () => {
+      await model?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+});
diff --git a/tests/tokenizers.test.js b/tests/tokenizers.test.js
index 3b0cfe7a7..2d9fc0e52 100644
--- a/tests/tokenizers.test.js
+++ b/tests/tokenizers.test.js
@@ -1,498 +1,502 @@
-
-
-import { AutoTokenizer } from '../src/transformers.js';
-import { getFile } from '../src/utils/hub.js';
-import { m, MAX_TEST_EXECUTION_TIME } from './init.js';
-import { compare } from './test_utils.js';
-
-// Load test data generated by the python tests
-// TODO do this dynamically?
-const { tokenization, templates } = await (await getFile('./tests/data/tokenizer_tests.json')).json()
-
-// Dynamic tests to ensure transformers.js (JavaScript) matches transformers (Python)
-describe('Tokenizers (dynamic)', () => {
-
-    for (const [tokenizerName, tests] of Object.entries(tokenization)) {
-
-        it(tokenizerName, async () => {
-            const tokenizer = await AutoTokenizer.from_pretrained(m(tokenizerName));
-
-            for (const test of tests) {
-                // Two kinds of tests:
-                // 1. text w/o text_pair
-                // 2. text w text_pair
-
-                if (typeof test.input === 'string') {
-
-                    // Test encoding
-                    const encoded = tokenizer(test.input, {
-                        return_tensor: false
-                    });
-
-                    // Add the input text to the encoded object for easier debugging
-                    test.encoded.input = encoded.input = test.input;
-
-                    expect(encoded).toEqual(test.encoded);
-
-                    // Skip decoding tests if encoding produces zero tokens
-                    if (test.encoded.input_ids.length === 0) continue;
-
-                    // Test decoding
-                    const decoded_with_special = tokenizer.decode(encoded.input_ids, { skip_special_tokens: false });
-                    expect(decoded_with_special).toEqual(test.decoded_with_special);
-
-                    const decoded_without_special = tokenizer.decode(encoded.input_ids, { skip_special_tokens: true });
-                    expect(decoded_without_special).toEqual(test.decoded_without_special);
-
-                } else {
-                    const { text, text_pair } = test.input;
-                    const encoded = tokenizer(text, {
-                        text_pair,
-                        return_tensor: false,
-                    });
-                    compare(encoded, test.output);
-                }
-            }
-        }, MAX_TEST_EXECUTION_TIME);
-    }
+import { AutoTokenizer } from "../src/tokenizers.js";
+import * as TOKENIZER_TESTS from "./models/all_tokenization_tests.js";
+
+import { compare } from "./test_utils.js";
+
+const MAX_LOAD_TIME = 10_000;
+const MAX_EXECUTION_TIME = 10_000;
+
+describe("Tokenizers (model-specific)", () => {
+  for (const [tokenizer_name, { TOKENIZER_CLASS, TEST_CONFIG, CUSTOM_TESTS }] of Object.entries(TOKENIZER_TESTS)) {
+    describe(tokenizer_name, () => {
+      for (const model_id in TEST_CONFIG) {
+        describe(model_id, () => {
+          /** @type {import('../src/tokenizers.js').PreTrainedTokenizer} */
+          let tokenizer;
+          beforeAll(async () => {
+            tokenizer = await TOKENIZER_CLASS.from_pretrained(model_id);
+          }, MAX_LOAD_TIME);
+
+          for (const [test_name, test_case] of Object.entries(TEST_CONFIG[model_id])) {
+            test(test_name, () => {
+              if (test_case.ids) {
+                const ids = tokenizer.encode(test_case.text, {
+                  text_pair: test_case.text_pair,
+                });
+                expect(ids).toEqual(test_case.ids);
+              }
+              if (test_case.tokens) {
+                const tokens = tokenizer.tokenize(test_case.text, {
+                  pair: test_case.text_pair,
+                });
+                expect(tokens).toEqual(test_case.tokens);
+              }
+              if (test_case.decoded) {
+                const decoded = tokenizer.decode(test_case.ids);
+                expect(decoded).toEqual(test_case.decoded);
+              }
+            });
+          }
+        });
+      }
+      // Run custom tests, if they exist
+      CUSTOM_TESTS && describe("custom", CUSTOM_TESTS);
+    });
+  }
 });
 
-// Tests to ensure that no matter what, the correct tokenization is returned.
-// This is necessary since there are sometimes bugs in the transformers library.
-describe('Tokenizers (hard-coded)', () => {
-    const TESTS = {
-        'Xenova/llama-tokenizer': [ // Test legacy compatibility
-            {
-                // legacy unset => legacy=true
-                // NOTE: While incorrect, it is necessary to match legacy behaviour
-                data: {
-                    "<s>\n": [1, 29871, 13],
-                },
-                legacy: null,
-            },
-            {
-                // override legacy=true (same results as above)
-                data: {
-                    "<s>\n": [1, 29871, 13],
-                },
-                legacy: true,
-            },
-            {
-                // override legacy=false (fixed results)
-                data: {
-                    "<s>\n": [1, 13],
-                },
-                legacy: false,
-            }
+describe("Tokenizer padding/truncation", () => {
+  const inputs = ["a", "b c"];
+  const text_pair = ["d e", "f g h"];
+
+  it("should create a jagged array", async () => {
+    const tokenizer = await AutoTokenizer.from_pretrained("Xenova/bert-base-uncased");
+
+    {
+      // support jagged array if `return_tensor=false`
+      const output = tokenizer(inputs, {
+        return_tensor: false,
+      });
+      const expected = {
+        input_ids: [
+          [101, 1037, 102],
+          [101, 1038, 1039, 102],
         ],
-
-        'Xenova/llama-tokenizer_new': [ // legacy=false
-            {
-                data: {
-                    " </s> 1  2   3    4   ": [259, 2, 29871, 29896, 259, 29906, 1678, 29941, 268, 29946, 1678],
-                    "<s>\n": [1, 13],
-                    "</s>test</s>": [2, 1688, 2],
-                    " </s> test </s> ": [259, 2, 1243, 29871, 2, 29871],
-                    "A\n'll": [319, 13, 29915, 645],
-                    "Hey </s>. how are you": [18637, 29871, 2, 29889, 920, 526, 366],
-                    "  Hi  Hello  ": [259, 6324, 29871, 15043, 259],
-                },
-                reversible: true,
-                legacy: null,
-            },
-            { // override legacy=true (incorrect results, but necessary to match legacy behaviour)
-                data: {
-                    "<s>\n": [1, 29871, 13],
-                },
-                legacy: true,
-            },
+        attention_mask: [
+          [1, 1, 1],
+          [1, 1, 1, 1],
         ],
-
-        // legacy=false
-        'Xenova/t5-tokenizer-new': [
-            {
-                data: {
-                    // https://github.com/huggingface/transformers/pull/26678
-                    // ['▁Hey', '▁', '</s>', '.', '▁how', '▁are', '▁you']
-                    "Hey </s>. how are you": [9459, 3, 1, 5, 149, 33, 25],
-                },
-                reversible: true,
-                legacy: null,
-            },
-            {
-                data: {
-                    "</s>\n": [1, 3],
-                    "A\n'll": [71, 3, 31, 195],
-                },
-                reversible: false,
-                legacy: null,
-            }
+        token_type_ids: [
+          [0, 0, 0],
+          [0, 0, 0, 0],
         ],
+      };
+      compare(output, expected);
     }
 
-    // Re-use the same tests for the llama2 tokenizer
-    TESTS['Xenova/llama2-tokenizer'] = TESTS['Xenova/llama-tokenizer_new'];
-
-    for (const [tokenizerName, test_data] of Object.entries(TESTS)) {
-
-        it(tokenizerName, async () => {
-            for (const { data, reversible, legacy } of test_data) {
-                const tokenizer = await AutoTokenizer.from_pretrained(m(tokenizerName), { legacy });
-
-                for (const [text, expected] of Object.entries(data)) {
-                    const token_ids = tokenizer.encode(text, null, { add_special_tokens: false });
-                    expect(token_ids).toEqual(expected);
-
-                    // If reversible, test that decoding produces the original text
-                    if (reversible) {
-                        const decoded = tokenizer.decode(token_ids);
-                        expect(decoded).toEqual(text);
-                    }
-                }
-            }
-        }, MAX_TEST_EXECUTION_TIME);
+    {
+      const output = tokenizer(inputs, {
+        return_tensor: false,
+        truncation: true,
+        add_special_tokens: false,
+      });
+      const expected = {
+        input_ids: [[1037], [1038, 1039]],
+        attention_mask: [[1], [1, 1]],
+        token_type_ids: [[0], [0, 0]],
+      };
+      compare(output, expected);
     }
-});
+  });
+
+  it(
+    "should create a tensor",
+    async () => {
+      const tokenizer = await AutoTokenizer.from_pretrained("Xenova/bert-base-uncased");
+
+      {
+        // Expected to throw error if jagged array
+        expect(() => tokenizer(inputs)).toThrowError("Unable to create tensor");
+      }
+
+      {
+        // Truncation
+        const { input_ids, attention_mask, token_type_ids } = tokenizer(inputs, {
+          truncation: true,
+          max_length: 1,
+          add_special_tokens: false,
+        });
 
-describe('Tokenizer padding/truncation', () => {
-    const inputs = ['a', 'b c'];
-    const text_pair = ['d e', 'f g h'];
-
-    it('should create a jagged array', async () => {
-        const tokenizer = await AutoTokenizer.from_pretrained('Xenova/bert-base-uncased');
-
-        { // support jagged array if `return_tensor=false`
-            const output = tokenizer(inputs, {
-                return_tensor: false,
-            })
-            const expected = {
-                input_ids: [[101, 1037, 102], [101, 1038, 1039, 102]],
-                attention_mask: [[1, 1, 1], [1, 1, 1, 1]],
-                token_type_ids: [[0, 0, 0], [0, 0, 0, 0]]
-            }
-            compare(output, expected);
-        }
+        expect(input_ids.tolist()).toEqual([[1037n], [1038n]]);
+        expect(attention_mask.tolist()).toEqual([[1n], [1n]]);
+        expect(token_type_ids.tolist()).toEqual([[0n], [0n]]);
+      }
+      {
+        // Truncation w/ text pair
+        // TODO
+      }
+
+      {
+        // Padding
+        const { input_ids, attention_mask, token_type_ids } = tokenizer(inputs, {
+          padding: true,
+          add_special_tokens: false,
+        });
 
-        {
-            const output = tokenizer(inputs, {
-                return_tensor: false,
-                truncation: true,
-                add_special_tokens: false,
-            })
-            const expected = {
-                input_ids: [[1037], [1038, 1039]],
-                attention_mask: [[1], [1, 1]],
-                token_type_ids: [[0], [0, 0]]
-            }
-            compare(output, expected);
-        }
-    })
-
-    it('should create a tensor', async () => {
-        const tokenizer = await AutoTokenizer.from_pretrained('Xenova/bert-base-uncased');
-
-        { // Expected to throw error if jagged array
-            expect(() => tokenizer(inputs)).toThrowError('Unable to create tensor');
-        }
-
-        { // Truncation
-            const { input_ids, attention_mask, token_type_ids } = tokenizer(inputs, {
-                truncation: true,
-                max_length: 1,
-                add_special_tokens: false,
-            })
-
-            expect(input_ids.tolist()).toEqual([[1037n], [1038n]])
-            expect(attention_mask.tolist()).toEqual([[1n], [1n]])
-            expect(token_type_ids.tolist()).toEqual([[0n], [0n]])
-        }
-        { // Truncation w/ text pair
-            // TODO
-        }
-
-        { // Padding
-            const { input_ids, attention_mask, token_type_ids } = tokenizer(inputs, {
-                padding: true,
-                add_special_tokens: false,
-            })
-
-            expect(input_ids.tolist()).toEqual([[1037n, 0n], [1038n, 1039n]])
-            expect(attention_mask.tolist()).toEqual([[1n, 0n], [1n, 1n]])
-            expect(token_type_ids.tolist()).toEqual([[0n, 0n], [0n, 0n]])
-        }
-        { // Padding w/ text pair
-            const { input_ids, attention_mask, token_type_ids } = tokenizer(inputs, {
-                text_pair,
-                padding: true,
-                add_special_tokens: false,
-            })
-
-            expect(input_ids.tolist()).toEqual([
-                [1037n, 1040n, 1041n, 0n, 0n],
-                [1038n, 1039n, 1042n, 1043n, 1044n],
-            ]);
-            expect(attention_mask.tolist()).toEqual([
-                [1n, 1n, 1n, 0n, 0n],
-                [1n, 1n, 1n, 1n, 1n],
-            ]);
-            expect(token_type_ids.tolist()).toEqual([
-                [0n, 1n, 1n, 0n, 0n],
-                [0n, 0n, 1n, 1n, 1n],
-            ]);
-        }
-
-        { // Truncation + padding
-            const { input_ids, attention_mask, token_type_ids } = tokenizer(['a', 'b c', 'd e f'], {
-                padding: true,
-                truncation: true,
-                add_special_tokens: false,
-                max_length: 2,
-            })
-
-            expect(input_ids.tolist()).toEqual([[1037n, 0n], [1038n, 1039n], [1040n, 1041n]])
-            expect(attention_mask.tolist()).toEqual([[1n, 0n], [1n, 1n], [1n, 1n]])
-            expect(token_type_ids.tolist()).toEqual([[0n, 0n], [0n, 0n], [0n, 0n]])
-        }
-    }, MAX_TEST_EXECUTION_TIME);
-});
+        expect(input_ids.tolist()).toEqual([
+          [1037n, 0n],
+          [1038n, 1039n],
+        ]);
+        expect(attention_mask.tolist()).toEqual([
+          [1n, 0n],
+          [1n, 1n],
+        ]);
+        expect(token_type_ids.tolist()).toEqual([
+          [0n, 0n],
+          [0n, 0n],
+        ]);
+      }
+      {
+        // Padding w/ text pair
+        const { input_ids, attention_mask, token_type_ids } = tokenizer(inputs, {
+          text_pair,
+          padding: true,
+          add_special_tokens: false,
+        });
 
-describe('Token type ids', () => {
-    it('should correctly add token type ids', async () => {
-        const tokenizer = await AutoTokenizer.from_pretrained('Xenova/bert-base-uncased');
+        expect(input_ids.tolist()).toEqual([
+          [1037n, 1040n, 1041n, 0n, 0n],
+          [1038n, 1039n, 1042n, 1043n, 1044n],
+        ]);
+        expect(attention_mask.tolist()).toEqual([
+          [1n, 1n, 1n, 0n, 0n],
+          [1n, 1n, 1n, 1n, 1n],
+        ]);
+        expect(token_type_ids.tolist()).toEqual([
+          [0n, 1n, 1n, 0n, 0n],
+          [0n, 0n, 1n, 1n, 1n],
+        ]);
+      }
+
+      {
+        // Truncation + padding
+        const { input_ids, attention_mask, token_type_ids } = tokenizer(["a", "b c", "d e f"], {
+          padding: true,
+          truncation: true,
+          add_special_tokens: false,
+          max_length: 2,
+        });
 
-        const model_inputs = tokenizer(
-            ['a b c', 'd'],
-            {
-                text_pair: ['e f', 'g h'],
-                padding: true,
-                truncation: true,
-                return_tensor: false,
-            }
-        );
+        expect(input_ids.tolist()).toEqual([
+          [1037n, 0n],
+          [1038n, 1039n],
+          [1040n, 1041n],
+        ]);
+        expect(attention_mask.tolist()).toEqual([
+          [1n, 0n],
+          [1n, 1n],
+          [1n, 1n],
+        ]);
+        expect(token_type_ids.tolist()).toEqual([
+          [0n, 0n],
+          [0n, 0n],
+          [0n, 0n],
+        ]);
+      }
+    },
+    MAX_EXECUTION_TIME,
+  );
+});
 
+describe("Token type ids", () => {
+  it(
+    "should correctly add token type ids",
+    async () => {
+      const tokenizer = await AutoTokenizer.from_pretrained("Xenova/bert-base-uncased");
+
+      const model_inputs = tokenizer(["a b c", "d"], {
+        text_pair: ["e f", "g h"],
+        padding: true,
+        truncation: true,
+        return_tensor: false,
+      });
+
+      const expected = {
+        input_ids: [
+          [101, 1037, 1038, 1039, 102, 1041, 1042, 102],
+          [101, 1040, 102, 1043, 1044, 102, 0, 0],
+        ],
+        token_type_ids: [
+          [0, 0, 0, 0, 0, 1, 1, 1],
+          [0, 0, 0, 1, 1, 1, 0, 0],
+        ],
+        attention_mask: [
+          [1, 1, 1, 1, 1, 1, 1, 1],
+          [1, 1, 1, 1, 1, 1, 0, 0],
+        ],
+      };
+
+      compare(model_inputs, expected);
+    },
+    MAX_EXECUTION_TIME,
+  );
+
+  it(
+    "should add token type ids if user requests them",
+    async () => {
+      const tokenizer = await AutoTokenizer.from_pretrained("Xenova/llama3-tokenizer-new");
+
+      {
+        // Without text pair
+        const model_inputs = tokenizer("hello", {
+          return_tensor: false,
+          return_token_type_ids: true,
+        });
         const expected = {
-            input_ids: [
-                [101, 1037, 1038, 1039, 102, 1041, 1042, 102],
-                [101, 1040, 102, 1043, 1044, 102, 0, 0],
-            ],
-            token_type_ids: [
-                [0, 0, 0, 0, 0, 1, 1, 1],
-                [0, 0, 0, 1, 1, 1, 0, 0],
-            ],
-            attention_mask: [
-                [1, 1, 1, 1, 1, 1, 1, 1],
-                [1, 1, 1, 1, 1, 1, 0, 0],
-            ],
-        }
-
+          input_ids: [128000, 15339],
+          attention_mask: [1, 1],
+          token_type_ids: [0, 0],
+        };
         compare(model_inputs, expected);
-
-    }, MAX_TEST_EXECUTION_TIME);
-
-    it('should add token type ids if user requests them', async () => {
-        const tokenizer = await AutoTokenizer.from_pretrained('Xenova/llama3-tokenizer-new');
-
-        { // Without text pair
-            const model_inputs = tokenizer(
-                'hello',
-                {
-                    return_tensor: false,
-                    return_token_type_ids: true,
-                }
-            );
-            const expected = {
-                input_ids: [128000, 15339],
-                attention_mask: [1, 1],
-                token_type_ids: [0, 0]
-            }
-            compare(model_inputs, expected);
-        }
-
-        { // With text pair
-            const model_inputs = tokenizer(
-                'hello',
-                {
-                    text_pair: 'world',
-                    return_tensor: false,
-                    return_token_type_ids: true,
-                }
-            );
-            const expected = {
-                input_ids: [128000, 15339, 128000, 14957],
-                attention_mask: [1, 1, 1, 1],
-                token_type_ids: [0, 0, 1, 1]
-            }
-            compare(model_inputs, expected);
-        }
-
-    }, MAX_TEST_EXECUTION_TIME);
+      }
+
+      {
+        // With text pair
+        const model_inputs = tokenizer("hello", {
+          text_pair: "world",
+          return_tensor: false,
+          return_token_type_ids: true,
+        });
+        const expected = {
+          input_ids: [128000, 15339, 128000, 14957],
+          attention_mask: [1, 1, 1, 1],
+          token_type_ids: [0, 0, 1, 1],
+        };
+        compare(model_inputs, expected);
+      }
+    },
+    MAX_EXECUTION_TIME,
+  );
 });
 
-describe('Edge cases', () => {
-    it('should not crash when encoding a very long string', async () => {
-        let tokenizer = await AutoTokenizer.from_pretrained('Xenova/t5-small');
-
-        let text = String.prototype.repeat.call('Hello world! ', 50000);
-        let encoded = tokenizer(text);
-        expect(encoded.input_ids.data.length).toBeGreaterThan(100000);
-    }, MAX_TEST_EXECUTION_TIME);
-
-    it('should not take too long', async () => {
-        let tokenizer = await AutoTokenizer.from_pretrained('Xenova/all-MiniLM-L6-v2');
-
-        let text = String.prototype.repeat.call('a', 50000);
-        let token_ids = tokenizer.encode(text);
-        compare(token_ids, [101, 100, 102])
-    }, 5000); // NOTE: 5 seconds
+describe("Edge cases", () => {
+  it(
+    "should not crash when encoding a very long string",
+    async () => {
+      let tokenizer = await AutoTokenizer.from_pretrained("Xenova/t5-small");
+
+      let text = String.prototype.repeat.call("Hello world! ", 50000);
+      let encoded = tokenizer(text);
+      expect(encoded.input_ids.data.length).toBeGreaterThan(100000);
+    },
+    MAX_EXECUTION_TIME,
+  );
+
+  it("should not take too long", async () => {
+    let tokenizer = await AutoTokenizer.from_pretrained("Xenova/all-MiniLM-L6-v2");
+
+    let text = String.prototype.repeat.call("a", 50000);
+    let token_ids = tokenizer.encode(text);
+    compare(token_ids, [101, 100, 102]);
+  }, 5000); // NOTE: 5 seconds
+
+  it(
+    "Special/added tokens with earlier partial matches",
+    async () => {
+      let tokenizer = await AutoTokenizer.from_pretrained("Xenova/gemini-nano");
+      {
+        let token_ids = tokenizer.encode("\n", { add_special_tokens: false });
+        compare(token_ids, [108]);
+      }
+      {
+        let token_ids = tokenizer.encode("\n\n", { add_special_tokens: false });
+        compare(token_ids, [109]); // Should not be [108, 108]
+      }
+    },
+    MAX_EXECUTION_TIME,
+  );
 });
 
-describe('Extra decoding tests', () => {
-    it('should be able to decode the output of encode', async () => {
-        let tokenizer = await AutoTokenizer.from_pretrained('Xenova/bert-base-uncased');
+describe("Extra decoding tests", () => {
+  it(
+    "should be able to decode the output of encode",
+    async () => {
+      let tokenizer = await AutoTokenizer.from_pretrained("Xenova/bert-base-uncased");
+
+      let text = "hello world!";
+
+      // Ensure all the following outputs are the same:
+      // 1. Tensor of ids: allow decoding of 1D or 2D tensors.
+      let encodedTensor = tokenizer(text);
+      let decoded1 = tokenizer.decode(encodedTensor.input_ids, { skip_special_tokens: true });
+      let decoded2 = tokenizer.batch_decode(encodedTensor.input_ids, { skip_special_tokens: true })[0];
+      expect(decoded1).toEqual(text);
+      expect(decoded2).toEqual(text);
+
+      // 2. List of ids
+      let encodedList = tokenizer(text, { return_tensor: false });
+      let decoded3 = tokenizer.decode(encodedList.input_ids, { skip_special_tokens: true });
+      let decoded4 = tokenizer.batch_decode([encodedList.input_ids], { skip_special_tokens: true })[0];
+      expect(decoded3).toEqual(text);
+      expect(decoded4).toEqual(text);
+    },
+    MAX_EXECUTION_TIME,
+  );
+});
 
-        let text = 'hello world!';
+describe("Chat templates", () => {
+  it("should generate a chat template", async () => {
+    const tokenizer = await AutoTokenizer.from_pretrained("Xenova/mistral-tokenizer-v1");
 
-        // Ensure all the following outputs are the same:
-        // 1. Tensor of ids: allow decoding of 1D or 2D tensors.
-        let encodedTensor = tokenizer(text);
-        let decoded1 = tokenizer.decode(encodedTensor.input_ids, { skip_special_tokens: true });
-        let decoded2 = tokenizer.batch_decode(encodedTensor.input_ids, { skip_special_tokens: true })[0];
-        expect(decoded1).toEqual(text);
-        expect(decoded2).toEqual(text);
+    const chat = [
+      { role: "user", content: "Hello, how are you?" },
+      { role: "assistant", content: "I'm doing great. How can I help you today?" },
+      { role: "user", content: "I'd like to show off how chat templating works!" },
+    ];
 
-        // 2. List of ids
-        let encodedList = tokenizer(text, { return_tensor: false });
-        let decoded3 = tokenizer.decode(encodedList.input_ids, { skip_special_tokens: true });
-        let decoded4 = tokenizer.batch_decode([encodedList.input_ids], { skip_special_tokens: true })[0];
-        expect(decoded3).toEqual(text);
-        expect(decoded4).toEqual(text);
+    const text = tokenizer.apply_chat_template(chat, { tokenize: false });
 
-    }, MAX_TEST_EXECUTION_TIME);
-});
+    expect(text).toEqual("<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]");
 
-describe('Chat templates', () => {
-    it('should generate a chat template', async () => {
-        const tokenizer = await AutoTokenizer.from_pretrained("Xenova/mistral-tokenizer-v1");
+    const input_ids = tokenizer.apply_chat_template(chat, { tokenize: true, return_tensor: false });
+    compare(input_ids, [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793]);
+  });
 
-        const chat = [
-            { "role": "user", "content": "Hello, how are you?" },
-            { "role": "assistant", "content": "I'm doing great. How can I help you today?" },
-            { "role": "user", "content": "I'd like to show off how chat templating works!" },
-        ]
+  it("should support multiple chat templates", async () => {
+    const tokenizer = await AutoTokenizer.from_pretrained("Xenova/c4ai-command-r-v01-tokenizer");
 
-        const text = tokenizer.apply_chat_template(chat, { tokenize: false });
+    // define conversation input:
+    const conversation = [{ role: "user", content: "Whats the biggest penguin in the world?" }];
+    // define documents to ground on:
+    const documents = [
+      { title: "Tall penguins", text: "Emperor penguins are the tallest growing up to 122 cm in height." },
+      { title: "Penguin habitats", text: "Emperor penguins only live in Antarctica." },
+    ];
 
-        expect(text).toEqual("<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]");
+    // render the RAG prompt as a string:
+    const grounded_generation_prompt = tokenizer.apply_chat_template(conversation, {
+      chat_template: "rag",
+      tokenize: false,
+      add_generation_prompt: true,
 
-        const input_ids = tokenizer.apply_chat_template(chat, { tokenize: true, return_tensor: false });
-        compare(input_ids, [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793])
+      documents,
+      citation_mode: "accurate", // or "fast"
     });
+    expect(grounded_generation_prompt).toEqual("<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble\nThe instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral.\n\n" + "# System Preamble\n## Basic Rules\nYou are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.\n\n" + "# User Preamble\n## Task and Context\nYou help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.\n\n## Style Guide\nUnless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.<|END_OF_TURN_TOKEN|>" + "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Whats the biggest penguin in the world?<|END_OF_TURN_TOKEN|>" + "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><results>\nDocument: 0\ntitle: Tall penguins\ntext: Emperor penguins are the tallest growing up to 122 cm in height.\n\nDocument: 1\ntitle: Penguin habitats\ntext: Emperor penguins only live in Antarctica.\n</results><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Carefully perform the following instructions, in order, starting each with a new line.\nFirstly, Decide which of the retrieved documents are relevant to the user's last input by writing 'Relevant Documents:' followed by comma-separated list of document numbers. If none are relevant, you should instead write 'None'.\nSecondly, Decide which of the retrieved documents contain facts that should be cited in a good answer to the user's last input by writing 'Cited Documents:' followed a comma-separated list of document numbers. If you dont want to cite any of them, you should instead write 'None'.\nThirdly, Write 'Answer:' followed by a response to the user's last input in high quality natural english. Use the retrieved documents to help you. Do not insert any citations or grounding markup.\nFinally, Write 'Grounded answer:' followed by a response to the user's last input in high quality natural english. Use the symbols <co: doc> and </co: doc> to indicate when a fact comes from a document in the search result, e.g <co: 0>my fact</co: 0> for a fact from document 0.<|END_OF_TURN_TOKEN|>" + "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>");
+  });
 
-    it('should support multiple chat templates', async () => {
-
-        const tokenizer = await AutoTokenizer.from_pretrained("Xenova/c4ai-command-r-v01-tokenizer")
-
-        // define conversation input:
-        const conversation = [
-            { role: "user", content: "Whats the biggest penguin in the world?" }
-        ]
-        // define documents to ground on:
-        const documents = [
-            { title: "Tall penguins", text: "Emperor penguins are the tallest growing up to 122 cm in height." },
-            { title: "Penguin habitats", text: "Emperor penguins only live in Antarctica." }
-        ]
-
-        // render the RAG prompt as a string:
-        const grounded_generation_prompt = tokenizer.apply_chat_template(
-            conversation,
-            {
-                chat_template: "rag",
-                tokenize: false,
-                add_generation_prompt: true,
-
-                documents,
-                citation_mode: "accurate", // or "fast"
-            }
-        )
-        expect(grounded_generation_prompt).toEqual(
-            "<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble\nThe instructions in this section override those in the task description and style guide sections. Don't answer questions that are harmful or immoral.\n\n" +
-            "# System Preamble\n## Basic Rules\nYou are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user's requests, you cite your sources in your answers, according to those instructions.\n\n" +
-            "# User Preamble\n## Task and Context\nYou help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user's needs as best you can, which will be wide-ranging.\n\n## Style Guide\nUnless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.<|END_OF_TURN_TOKEN|>" +
-            "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>Whats the biggest penguin in the world?<|END_OF_TURN_TOKEN|>" +
-            "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><results>\nDocument: 0\ntitle: Tall penguins\ntext: Emperor penguins are the tallest growing up to 122 cm in height.\n\nDocument: 1\ntitle: Penguin habitats\ntext: Emperor penguins only live in Antarctica.\n</results><|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Carefully perform the following instructions, in order, starting each with a new line.\nFirstly, Decide which of the retrieved documents are relevant to the user's last input by writing 'Relevant Documents:' followed by comma-separated list of document numbers. If none are relevant, you should instead write 'None'.\nSecondly, Decide which of the retrieved documents contain facts that should be cited in a good answer to the user's last input by writing 'Cited Documents:' followed a comma-separated list of document numbers. If you dont want to cite any of them, you should instead write 'None'.\nThirdly, Write 'Answer:' followed by a response to the user's last input in high quality natural english. Use the retrieved documents to help you. Do not insert any citations or grounding markup.\nFinally, Write 'Grounded answer:' followed by a response to the user's last input in high quality natural english. Use the symbols <co: doc> and </co: doc> to indicate when a fact comes from a document in the search result, e.g <co: 0>my fact</co: 0> for a fact from document 0.<|END_OF_TURN_TOKEN|>" +
-            "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
-        );
-    });
+  it("should support automatic chat template detection based on inputs", async () => {
+    const tokenizer = await AutoTokenizer.from_pretrained("Xenova/c4ai-command-r-plus-08-2024-tokenizer");
 
-    it('should support user-defined chat template', async () => {
-        const tokenizer = await AutoTokenizer.from_pretrained("Xenova/llama-tokenizer");
-
-        const chat = [
-            { role: 'user', content: 'Hello, how are you?' },
-            { role: 'assistant', content: "I'm doing great. How can I help you today?" },
-            { role: 'user', content: "I'd like to show off how chat templating works!" },
-        ]
-
-        // https://discuss.huggingface.co/t/issue-with-llama-2-chat-template-and-out-of-date-documentation/61645/3
-        const chat_template = (
-            "{% if messages[0]['role'] == 'system' %}" +
-            "{% set loop_messages = messages[1:] %}" +  // Extract system message if it's present
-            "{% set system_message = messages[0]['content'] %}" +
-            "{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}" +
-            "{% set loop_messages = messages %}" +  // Or use the default system message if the flag is set
-            "{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}" +
-            "{% else %}" +
-            "{% set loop_messages = messages %}" +
-            "{% set system_message = false %}" +
-            "{% endif %}" +
-            "{% if loop_messages|length == 0 and system_message %}" +  // Special handling when only sys message present
-            "{{ bos_token + '[INST] <<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n [/INST]' }}" +
-            "{% endif %}" +
-            "{% for message in loop_messages %}" +  // Loop over all non-system messages
-            "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}" +
-            "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}" +
-            "{% endif %}" +
-            "{% if loop.index0 == 0 and system_message != false %}" +  // Embed system message in first message
-            "{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}" +
-            "{% else %}" +
-            "{% set content = message['content'] %}" +
-            "{% endif %}" +
-            "{% if message['role'] == 'user' %}" +  // After all of that, handle messages/roles in a fairly normal way
-            "{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}" +
-            "{% elif message['role'] == 'system' %}" +
-            "{{ '<<SYS>>\\n' + content.strip() + '\\n<</SYS>>\\n\\n' }}" +
-            "{% elif message['role'] == 'assistant' %}" +
-            "{{ ' '  + content.strip() + ' ' + eos_token }}" +
-            "{% endif %}" +
-            "{% endfor %}"
-        )
-            .replaceAll('USE_DEFAULT_PROMPT', true)
-            .replaceAll('DEFAULT_SYSTEM_MESSAGE', 'You are a helpful, respectful and honest assistant.');
-
-        const text = tokenizer.apply_chat_template(chat, { tokenize: false, return_tensor: false, chat_template });
-
-        expect(text).toEqual("<s>[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant.\n<</SYS>>\n\nHello, how are you? [/INST] I'm doing great. How can I help you today? </s><s>[INST] I'd like to show off how chat templating works! [/INST]");
-
-        // TODO: Add test for token_ids once bug in transformers is fixed.
-    });
+    // Examples adapted from https://huggingface.co/CohereForAI/c4ai-command-r-plus-08-2024
 
-    // Dynamically-generated tests
-    for (const [tokenizerName, tests] of Object.entries(templates)) {
+    {
+      // - default
+      // define conversation input:
+      const messages = [{ role: "user", content: "Hello, how are you?" }];
 
-        it(tokenizerName, async () => {
-            // NOTE: not m(...) here
-            // TODO: update this?
-            const tokenizer = await AutoTokenizer.from_pretrained(tokenizerName);
+      // Format message with the command-r-plus-08-2024 chat template
+      const prompt = tokenizer.apply_chat_template(messages, { tokenize: false, add_generation_prompt: true });
+      expect(prompt).toEqual("<BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>");
+    }
 
-            for (let { messages, add_generation_prompt, tokenize, target } of tests) {
+    {
+      // - tool_use
+      // define conversation input:
+      const conversation = [{ role: "user", content: "Whats the biggest penguin in the world?" }];
 
-                const generated = tokenizer.apply_chat_template(messages, {
-                    tokenize,
-                    add_generation_prompt,
-                    return_tensor: false,
-                });
-                expect(generated).toEqual(target)
-            }
-        });
+      // Define tools available for the model to use:
+      const tools = [
+        {
+          name: "internet_search",
+          description: "Returns a list of relevant document snippets for a textual query retrieved from the internet",
+          parameter_definitions: {
+            query: {
+              description: "Query to search the internet with",
+              type: "str",
+              required: true,
+            },
+          },
+        },
+        {
+          name: "directly_answer",
+          description: "Calls a standard (un-augmented) AI chatbot to generate a response given the conversation history",
+          parameter_definitions: {},
+        },
+      ];
+
+      // render the tool use prompt as a string:
+      const prompt = tokenizer.apply_chat_template(conversation, { tools, tokenize: false, add_generation_prompt: true });
+      expect(prompt).toEqual('<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># Safety Preamble\nThe instructions in this section override those in the task description and style guide sections. Don\'t answer questions that are harmful or immoral.\n\n# System Preamble\n## Basic Rules\nYou are a powerful conversational AI trained by Cohere to help people. You are augmented by a number of tools, and your job is to use and consume the output of these tools to best help the user. You will see a conversation history between yourself and a user, ending with an utterance from the user. You will then see a specific instruction instructing you what kind of response to generate. When you answer the user\'s requests, you cite your sources in your answers, according to those instructions.\n\n# User Preamble\n## Task and Context\nYou help people answer their questions and other requests interactively. You will be asked a very wide array of requests on all kinds of topics. You will be equipped with a wide range of search engines or similar tools to help you, which you use to research your answer. You should focus on serving the user\'s needs as best you can, which will be wide-ranging.\n\n## Style Guide\nUnless the user asks for a different style of answer, you should answer in full sentences, using proper grammar and spelling.\n\n## Available Tools\nHere is a list of tools that you have available to you:\n\n```python\ndef internet_search(query: str) -> List[Dict]:\n    """Returns a list of relevant document snippets for a textual query retrieved from the internet\n\n    Args:\n        query (str): Query to search the internet with\n    """\n    pass\n```\n\n```python\ndef directly_answer() -> List[Dict]:\n    """Calls a standard (un-augmented) AI chatbot to generate a response given the conversation history\n    """\n    pass\n```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Whats the biggest penguin in the world?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>Write \'Action:\' followed by a json-formatted list of actions that you want to perform in order to produce a good response to the user\'s last input. You can use any of the supplied tools any number of times, but you should aim to execute the minimum number of necessary actions for the input. You should use the `directly-answer` tool if calling the other tools is unnecessary. The list of actions you want to call should be formatted as a list of json objects, for example:\n```json\n[\n    {\n        "tool_name": title of the tool in the specification,\n        "parameters": a dict of parameters to input into the tool as they are defined in the specs, or {} if it takes no parameters\n    }\n]```<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>');
+    }
+  });
+
+  it("should support user-defined chat template", async () => {
+    const tokenizer = await AutoTokenizer.from_pretrained("Xenova/llama-tokenizer");
+
+    const chat = [
+      { role: "user", content: "Hello, how are you?" },
+      { role: "assistant", content: "I'm doing great. How can I help you today?" },
+      { role: "user", content: "I'd like to show off how chat templating works!" },
+    ];
+
+    // https://discuss.huggingface.co/t/issue-with-llama-2-chat-template-and-out-of-date-documentation/61645/3
+    const chat_template = (
+      "{% if messages[0]['role'] == 'system' %}" +
+      "{% set loop_messages = messages[1:] %}" + // Extract system message if it's present
+      "{% set system_message = messages[0]['content'] %}" +
+      "{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}" +
+      "{% set loop_messages = messages %}" + // Or use the default system message if the flag is set
+      "{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}" +
+      "{% else %}" +
+      "{% set loop_messages = messages %}" +
+      "{% set system_message = false %}" +
+      "{% endif %}" +
+      "{% if loop_messages|length == 0 and system_message %}" + // Special handling when only sys message present
+      "{{ bos_token + '[INST] <<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n [/INST]' }}" +
+      "{% endif %}" +
+      "{% for message in loop_messages %}" + // Loop over all non-system messages
+      "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}" +
+      "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}" +
+      "{% endif %}" +
+      "{% if loop.index0 == 0 and system_message != false %}" + // Embed system message in first message
+      "{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}" +
+      "{% else %}" +
+      "{% set content = message['content'] %}" +
+      "{% endif %}" +
+      "{% if message['role'] == 'user' %}" + // After all of that, handle messages/roles in a fairly normal way
+      "{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}" +
+      "{% elif message['role'] == 'system' %}" +
+      "{{ '<<SYS>>\\n' + content.strip() + '\\n<</SYS>>\\n\\n' }}" +
+      "{% elif message['role'] == 'assistant' %}" +
+      "{{ ' '  + content.strip() + ' ' + eos_token }}" +
+      "{% endif %}" +
+      "{% endfor %}"
+    )
+      .replaceAll("USE_DEFAULT_PROMPT", true)
+      .replaceAll("DEFAULT_SYSTEM_MESSAGE", "You are a helpful, respectful and honest assistant.");
+
+    const text = tokenizer.apply_chat_template(chat, { tokenize: false, return_tensor: false, chat_template });
+
+    expect(text).toEqual("<s>[INST] <<SYS>>\nYou are a helpful, respectful and honest assistant.\n<</SYS>>\n\nHello, how are you? [/INST] I'm doing great. How can I help you today? </s><s>[INST] I'd like to show off how chat templating works! [/INST]");
+
+    // TODO: Add test for token_ids once bug in transformers is fixed.
+  });
+
+  it("should throw an error when no chat template is detected", async () => {
+    const tokenizer = await AutoTokenizer.from_pretrained("Xenova/gpt-4o");
+
+    const chat = [{ role: "user", content: "Hello, how are you?" }];
+
+    expect(() => tokenizer.apply_chat_template(chat, { tokenize: false })).toThrowError("tokenizer.chat_template is not set and no template argument was passed");
+  });
+
+  it("should support default parameters", async () => {
+    const tokenizer = await AutoTokenizer.from_pretrained("Xenova/Meta-Llama-3.1-Tokenizer");
+
+    // Example adapted from https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct#tool-use-with-transformers
+    const chat = [
+      { role: "system", content: "You are a bot that responds to weather queries." },
+      { role: "user", content: "Hey, what's the temperature in Paris right now?" },
+    ];
+    const tools = [{ type: "function", function: { name: "get_current_temperature", description: "Get the current temperature at a location.", parameters: { type: "object", properties: { location: { type: "string", description: 'The location to get the temperature for, in the format "City, Country"' } }, required: ["location"] }, return: { type: "number", description: "The current temperature at the specified location in the specified units, as a float." } } }];
+
+    {
+      // `tools` unset (will default to `null`)
+      const text = tokenizer.apply_chat_template(chat, { tokenize: false });
+      expect(text).toEqual("<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are a bot that responds to weather queries.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHey, what's the temperature in Paris right now?<|eot_id|>");
+
+      const input_ids = tokenizer.apply_chat_template(chat, { tokenize: true, return_tensor: false });
+      compare(input_ids, [128000, 128006, 9125, 128007, 271, 38766, 1303, 33025, 2696, 25, 6790, 220, 2366, 18, 198, 15724, 2696, 25, 220, 1627, 10263, 220, 2366, 19, 271, 2675, 527, 264, 11164, 430, 31680, 311, 9282, 20126, 13, 128009, 128006, 882, 128007, 271, 19182, 11, 1148, 596, 279, 9499, 304, 12366, 1314, 1457, 30, 128009]);
+    }
+
+    {
+      // `tools` set
+      const text = tokenizer.apply_chat_template(chat, { tools, tokenize: false });
+      expect(text).toEqual('<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nEnvironment: ipython\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are a bot that responds to weather queries.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nGiven the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.\n\nRespond in the format {"name": function name, "parameters": dictionary of argument name and its value}.Do not use variables.\n\n{\n    "type": "function",\n    "function": {\n        "name": "get_current_temperature",\n        "description": "Get the current temperature at a location.",\n        "parameters": {\n            "type": "object",\n            "properties": {\n                "location": {\n                    "type": "string",\n                    "description": "The location to get the temperature for, in the format \\"City, Country\\""\n                }\n            },\n            "required": [\n                "location"\n            ]\n        },\n        "return": {\n            "type": "number",\n            "description": "The current temperature at the specified location in the specified units, as a float."\n        }\n    }\n}\n\nHey, what\'s the temperature in Paris right now?<|eot_id|>');
+
+      const input_ids = tokenizer.apply_chat_template(chat, { tools, tokenize: true, return_tensor: false });
+      compare(input_ids, [128000, 128006, 9125, 128007, 271, 13013, 25, 6125, 27993, 198, 38766, 1303, 33025, 2696, 25, 6790, 220, 2366, 18, 198, 15724, 2696, 25, 220, 1627, 10263, 220, 2366, 19, 271, 2675, 527, 264, 11164, 430, 31680, 311, 9282, 20126, 13, 128009, 128006, 882, 128007, 271, 22818, 279, 2768, 5865, 11, 4587, 6013, 449, 264, 4823, 369, 264, 734, 1650, 449, 1202, 6300, 6105, 430, 1888, 11503, 279, 2728, 10137, 382, 66454, 304, 279, 3645, 5324, 609, 794, 734, 836, 11, 330, 14105, 794, 11240, 315, 5811, 836, 323, 1202, 907, 7966, 5519, 539, 1005, 7482, 382, 517, 262, 330, 1337, 794, 330, 1723, 761, 262, 330, 1723, 794, 341, 286, 330, 609, 794, 330, 456, 11327, 54625, 761, 286, 330, 4789, 794, 330, 1991, 279, 1510, 9499, 520, 264, 3813, 10560, 286, 330, 14105, 794, 341, 310, 330, 1337, 794, 330, 1735, 761, 310, 330, 13495, 794, 341, 394, 330, 2588, 794, 341, 504, 330, 1337, 794, 330, 928, 761, 504, 330, 4789, 794, 330, 791, 3813, 311, 636, 279, 9499, 369, 11, 304, 279, 3645, 7393, 13020, 11, 14438, 2153, 702, 394, 457, 310, 1173, 310, 330, 6413, 794, 2330, 394, 330, 2588, 702, 310, 5243, 286, 1173, 286, 330, 693, 794, 341, 310, 330, 1337, 794, 330, 4174, 761, 310, 330, 4789, 794, 330, 791, 1510, 9499, 520, 279, 5300, 3813, 304, 279, 5300, 8316, 11, 439, 264, 2273, 10246, 286, 457, 262, 457, 633, 19182, 11, 1148, 596, 279, 9499, 304, 12366, 1314, 1457, 30, 128009]);
     }
+  });
 });
diff --git a/tests/utils.test.js b/tests/utils.test.js
deleted file mode 100644
index 10c1bf2d2..000000000
--- a/tests/utils.test.js
+++ /dev/null
@@ -1,57 +0,0 @@
-
-import { AutoProcessor } from '../src/transformers.js';
-import { mel_filter_bank } from '../src/utils/audio.js';
-import { getFile } from '../src/utils/hub.js';
-
-import { MAX_TEST_EXECUTION_TIME } from './init.js';
-
-describe('Utilities', () => {
-
-    describe('Audio utilities', () => {
-
-        it('should calculate MEL filters', async () => {
-
-            // NOTE: Uses official HF implementation as reference:
-            const processor = await AutoProcessor.from_pretrained('openai/whisper-tiny.en');
-            const config = processor.feature_extractor.config;
-
-            // True MEL filters
-            const original_mel_filters = config.mel_filters;
-
-            // Calculated MEL filters
-            const calculated_mel_filters = mel_filter_bank(
-                Math.floor(1 + config.n_fft / 2), // num_frequency_bins
-                config.feature_size, // num_mel_filters
-                0.0, // min_frequency
-                8000.0, // max_frequency
-                config.sampling_rate, // sampling_rate
-                "slaney", // norm
-                "slaney", // mel_scale
-            );
-
-            const original = original_mel_filters.flat();
-            const calculated = calculated_mel_filters.flat();
-
-            // Compute max difference
-            const maxdiff = original.reduce((maxdiff, _, i) => {
-                const diff = Math.abs(original[i] - calculated[i]);
-                return Math.max(maxdiff, diff);
-            }, -Infinity);
-            expect(maxdiff).toBeGreaterThanOrEqual(0);
-            expect(maxdiff).toBeLessThan(1e-6);
-
-        }, MAX_TEST_EXECUTION_TIME);
-
-    });
-
-    describe('Hub utilities', () => {
-            
-        it('Read data from blob', async () => {
-            const blob = new Blob(['Hello, world!'], { type: 'text/plain' });
-            const blobUrl = URL.createObjectURL(blob);
-            const data = await getFile(blobUrl);
-            expect(await data.text()).toBe('Hello, world!');
-        });
-
-    });
-});
diff --git a/tests/utils/data_structures.test.js b/tests/utils/data_structures.test.js
new file mode 100644
index 000000000..033a91d00
--- /dev/null
+++ b/tests/utils/data_structures.test.js
@@ -0,0 +1,33 @@
+import { PriorityQueue } from "../../src/utils/data-structures.js";
+
+describe("Priority queue", () => {
+  const EXAMPLE_ARRAY = [2, 5, 3, 1, 4];
+  it("default (max heap)", () => {
+    const queue = new PriorityQueue();
+    queue.extend(EXAMPLE_ARRAY);
+    expect(queue.pop()).toBe(5);
+  });
+
+  it("min heap", () => {
+    const queue = new PriorityQueue((a, b) => a < b);
+    queue.extend(EXAMPLE_ARRAY);
+    expect(queue.pop()).toBe(1);
+  });
+
+  it("heap w/ max size", () => {
+    const queue = new PriorityQueue((a, b) => a > b, 3);
+    queue.extend([1, 2, 3, 4, 5, 4, 3, 2, 1]);
+    expect(queue.pop()).toBe(5);
+
+    // Test with random sizes
+    const sizes = [1, 3, 4, 5, 8, 9, 15, 16, 31, 32, 127, 128];
+    const arr = Array.from({ length: 100 }, (_) => Math.random());
+    const max = Math.max(...arr);
+    for (const size of sizes) {
+      const queue = new PriorityQueue((a, b) => a > b, size);
+      queue.extend(arr);
+      expect(queue.pop()).toBe(max);
+      expect(queue.size).toBeLessThanOrEqual(size);
+    }
+  });
+});
diff --git a/tests/utils/generation.test.js b/tests/utils/generation.test.js
new file mode 100644
index 000000000..dd2229826
--- /dev/null
+++ b/tests/utils/generation.test.js
@@ -0,0 +1,204 @@
+import { AutoTokenizer } from "../../src/tokenizers.js";
+import { AutoModelForSeq2SeqLM, AutoModelForCausalLM } from "../../src/models.js";
+import { TextStreamer } from "../../src/generation/streamers.js";
+import { init, MAX_TEST_EXECUTION_TIME, MAX_MODEL_LOAD_TIME, MAX_MODEL_DISPOSE_TIME } from "../init.js";
+
+// Initialise the testing environment
+init();
+
+// Helper function to generate text
+const generate = async (model, tokenizer, text, options) => {
+  const inputs = tokenizer(text);
+  return await model.generate({
+    ...inputs,
+    ...options,
+  });
+};
+
+describe("Generation parameters", () => {
+  // List all models which will be tested
+  const models = [
+    "hf-internal-testing/tiny-random-T5ForConditionalGeneration", //
+    "hf-internal-testing/tiny-random-LlamaForCausalLM", // decoder-only
+  ];
+  const DUMMY_TEXT = "hello";
+
+  describe(`encoder-decoder (${models[0]})`, () => {
+    const model_id = models[0];
+
+    let model;
+    let tokenizer;
+    beforeAll(async () => {
+      model = await AutoModelForSeq2SeqLM.from_pretrained(model_id);
+      tokenizer = await AutoTokenizer.from_pretrained(model_id);
+    }, MAX_MODEL_LOAD_TIME);
+
+    // NOTE: Since `max_length` defaults to 20, this case also tests that.
+    it(
+      "default",
+      async () => {
+        const outputs = await generate(model, tokenizer, DUMMY_TEXT, {});
+        expect(outputs.dims.at(-1)).toEqual(20);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      "max_new_tokens",
+      async () => {
+        const MAX_NEW_TOKENS = 5;
+        const outputs = await generate(model, tokenizer, DUMMY_TEXT, {
+          max_new_tokens: MAX_NEW_TOKENS,
+        });
+        expect(outputs.dims.at(-1)).toEqual(MAX_NEW_TOKENS + 1); // + 1 due to forced BOS token
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      "min_length",
+      async () => {
+        const MIN_LENGTH = 3;
+        const MAX_LENGTH = 5;
+        const outputs = await generate(model, tokenizer, DUMMY_TEXT, {
+          eos_token_id: 0,
+          min_length: MIN_LENGTH,
+          max_length: MAX_LENGTH,
+        });
+        expect(outputs.tolist()).toEqual([[0n, 11924n, 11924n, 11924n, 11924n]]);
+        expect(outputs.dims.at(-1)).toBeGreaterThanOrEqual(MIN_LENGTH);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      "min_new_tokens",
+      async () => {
+        const MIN_NEW_TOKENS = 2;
+        const MAX_LENGTH = 5;
+        const outputs = await generate(model, tokenizer, DUMMY_TEXT, {
+          eos_token_id: 0,
+          min_new_tokens: MIN_NEW_TOKENS,
+          max_length: MAX_LENGTH,
+        });
+        expect(outputs.tolist()).toEqual([[0n, 11924n, 11924n, 11924n, 11924n]]);
+        expect(outputs.dims.at(-1)).toBeGreaterThanOrEqual(MIN_NEW_TOKENS);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    afterAll(async () => {
+      await model?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+
+  describe(`decoder-only (${models[1]})`, () => {
+    const model_id = models[1];
+
+    let model;
+    let tokenizer;
+    beforeAll(async () => {
+      model = await AutoModelForCausalLM.from_pretrained(model_id);
+      tokenizer = await AutoTokenizer.from_pretrained(model_id);
+    }, MAX_MODEL_LOAD_TIME);
+
+    // NOTE: Since `max_length` defaults to 20, this case also tests that.
+    it(
+      "default",
+      async () => {
+        const outputs = await generate(model, tokenizer, DUMMY_TEXT, {});
+        expect(outputs.dims.at(-1)).toEqual(20);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      "max_new_tokens",
+      async () => {
+        const MAX_NEW_TOKENS = 5;
+        const PROMPT_LENGTH = 2; // BOS + DUMMY_TEXT
+        const outputs = await generate(model, tokenizer, DUMMY_TEXT, {
+          max_new_tokens: MAX_NEW_TOKENS,
+        });
+        const expected_length = PROMPT_LENGTH + MAX_NEW_TOKENS;
+        expect(outputs.dims.at(-1)).toEqual(expected_length);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      "min_length",
+      async () => {
+        const MIN_LENGTH = 4;
+        const outputs = await generate(model, tokenizer, DUMMY_TEXT, {
+          eos_token_id: [
+            18547, // min_length will suppress this token (generated by default)
+            16012, // stop at this token
+          ],
+          min_length: MIN_LENGTH,
+        });
+        expect(outputs.tolist()).toEqual([[1n, 22172n, 31583n, 18824n, 16621n, 8136n, 16012n]]);
+        expect(outputs.dims.at(-1)).toBeGreaterThanOrEqual(MIN_LENGTH);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      "min_new_tokens",
+      async () => {
+        const MIN_NEW_TOKENS = 2;
+        const outputs = await generate(model, tokenizer, DUMMY_TEXT, {
+          eos_token_id: [
+            18547, // min_new_tokens will suppress this token (generated by default)
+            16012, // stop at this token
+          ],
+          min_new_tokens: MIN_NEW_TOKENS,
+        });
+        expect(outputs.tolist()).toEqual([[1n, 22172n, 31583n, 18824n, 16621n, 8136n, 16012n]]);
+        expect(outputs.dims.at(-1)).toBeGreaterThanOrEqual(MIN_NEW_TOKENS);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    afterAll(async () => {
+      await model?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+});
+
+describe("Streamers", () => {
+  describe("decoder-only", () => {
+    const model_id = "hf-internal-testing/tiny-random-LlamaForCausalLM";
+    let model, tokenizer;
+    beforeAll(async () => {
+      model = await AutoModelForCausalLM.from_pretrained(model_id);
+      tokenizer = await AutoTokenizer.from_pretrained(model_id);
+    }, MAX_MODEL_LOAD_TIME);
+
+    it(
+      "batch_size=1",
+      async () => {
+        const target_chunks = ["helloerdingsdelete ", "melytabular ", "Stadiumoba ", "alcune ", "drug"];
+        const chunks = [];
+        const callback_function = (text) => {
+          chunks.push(text);
+        };
+        const streamer = new TextStreamer(tokenizer, { callback_function, skip_special_tokens: true });
+
+        const inputs = tokenizer("hello");
+        const outputs = await model.generate({
+          ...inputs,
+          max_length: 10,
+          streamer,
+        });
+        expect(outputs.tolist()).toEqual([[1n, 22172n, 18547n, 8143n, 22202n, 9456n, 17213n, 15330n, 26591n, 15721n]]);
+        expect(chunks).toEqual(target_chunks);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    afterAll(async () => {
+      await model?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+});
diff --git a/tests/utils/hub.test.js b/tests/utils/hub.test.js
new file mode 100644
index 000000000..19077f009
--- /dev/null
+++ b/tests/utils/hub.test.js
@@ -0,0 +1,40 @@
+import { AutoModel, PreTrainedModel } from "../../src/models.js";
+
+import { MAX_TEST_EXECUTION_TIME } from "../init.js";
+
+// TODO: Set cache folder to a temp directory
+
+describe("Hub", () => {
+  describe("Loading models", () => {
+    it(
+      "should load a model from the local cache",
+      async () => {
+        // 1. Local model exists (doesn't matter about status of remote file since local is tried first)
+        const model = await AutoModel.from_pretrained("hf-internal-testing/tiny-random-T5ForConditionalGeneration");
+        expect(model).toBeInstanceOf(PreTrainedModel);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      "should load a model from the remote cache",
+      async () => {
+        // 2. Local model doesn't exist, remote file exists
+        // This tests that fallback functionality is working
+        const model = await AutoModel.from_pretrained("hf-internal-testing/tiny-random-T5ForConditionalGeneration");
+        expect(model).toBeInstanceOf(PreTrainedModel);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      "should fail to load a model",
+      async () => {
+        // 3. Local model doesn't exist, remote file doesn't exist
+        // This tests that error handling is working.
+        await expect(AutoModel.from_pretrained("hf-internal-testing/this-model-does-not-exist")).rejects.toBeInstanceOf(Error);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+});
diff --git a/tests/utils/logits_process.test.js b/tests/utils/logits_process.test.js
new file mode 100644
index 000000000..5da188ed4
--- /dev/null
+++ b/tests/utils/logits_process.test.js
@@ -0,0 +1,88 @@
+import {
+  // Pipelines
+  pipeline,
+  TextGenerationPipeline,
+} from "../../src/transformers.js";
+
+import { init } from "../init.js";
+import { compare } from "../test_utils.js";
+init();
+
+const MAX_MODEL_LOAD_TIME = 10_000; // 10 seconds
+const MAX_TEST_EXECUTION_TIME = 10_000; // 10 seconds
+const MAX_MODEL_DISPOSE_TIME = 1_000; // 1 second
+
+const DEFAULT_MODEL_OPTIONS = {
+  dtype: "fp32",
+};
+
+describe("Logits Processors", () => {
+  describe("text-generation", () => {
+    const model_id = "hf-internal-testing/tiny-random-LlamaForCausalLM";
+
+    /** @type {TextGenerationPipeline} */
+    let pipe;
+    beforeAll(async () => {
+      pipe = await pipeline("text-generation", model_id, {
+        // TODO move to config
+        ...DEFAULT_MODEL_OPTIONS,
+      });
+    }, MAX_MODEL_LOAD_TIME);
+
+    describe("bad_word_ids", () => {
+      it(
+        "basic",
+        async () => {
+          const text_input = "hello";
+
+          const generated_text_target = " Bert explicit wed digasset";
+          const text_target = [{ generated_text: text_input + generated_text_target }];
+
+          const output = await pipe(text_input, {
+            max_new_tokens: 5,
+            bad_words_ids: [
+              // default: [22172n, 18547n, 8136n, 16012n, 28064n, 11361n]
+              [18547],
+
+              // block #1: [22172n, 16662n, 6261n, 18916n, 29109n, 799n]
+              [6261, 18916],
+            ],
+          });
+          compare(output, text_target);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+
+      it(
+        "many bad words",
+        async () => {
+          const text_input = "hello";
+
+          const generated_text_target = "erdingsdeletearus)?nor";
+          const text_target = [{ generated_text: text_input + generated_text_target }];
+
+          // Construct long list of bad words
+          const bad_words_ids = [];
+          // default:  [22172n, 18547n, 8136n, 16012n, 28064n, 11361n]
+          for (let i = 0; i < 100000; ++i) {
+            bad_words_ids.push([i * 2]); // block all even numbers
+          }
+          // block #1: [22172n, 18547n, 8143n, 30327n, 20061n, 18193n]
+          bad_words_ids.push([8143, 30327]);
+
+          // block #2: [22172n, 18547n, 8143n, 29485n, 3799n, 29331n]
+          bad_words_ids.push([18547, 8143, 29485]);
+
+          // block #3: [22172n, 18547n, 8143n, 26465n, 6877n, 15459n]
+          const output = await pipe(text_input, { max_new_tokens: 5, bad_words_ids });
+          compare(output, text_target);
+        },
+        MAX_TEST_EXECUTION_TIME,
+      );
+    });
+
+    afterAll(async () => {
+      await pipe?.dispose();
+    }, MAX_MODEL_DISPOSE_TIME);
+  });
+});
diff --git a/tests/utils/maths.test.js b/tests/utils/maths.test.js
new file mode 100644
index 000000000..9a02d4dbd
--- /dev/null
+++ b/tests/utils/maths.test.js
@@ -0,0 +1,192 @@
+import { FFT, medianFilter, bankers_round, log_softmax } from "../../src/utils/maths.js";
+import { compare } from "../test_utils.js";
+
+const fft = (arr, complex = false) => {
+  let output;
+  let fft;
+  if (complex) {
+    fft = new FFT(arr.length / 2);
+    output = new Float64Array(fft.outputBufferSize);
+    fft.transform(output, arr);
+  } else {
+    fft = new FFT(arr.length);
+    output = new Float64Array(fft.outputBufferSize);
+    fft.realTransform(output, arr);
+  }
+  if (!fft.isPowerOfTwo) {
+    output = output.slice(0, complex ? arr.length : 2 * arr.length);
+  }
+  return output;
+};
+
+const FFT_TEST_DATA = {
+  fft_2_real: { complex: false, input: [1.764052345967664, 0.4001572083672233], output: [2.1642095543348874, 0.0, 1.3638951376004407, 0.0] },
+  fft_3_real: { complex: false, input: [0.9787379841057392, 2.240893199201458, 1.8675579901499675], output: [5.087189173457165, 0.0, -1.0754876105699736, -0.3233177751657647, -1.0754876105699736, 0.3233177751657647] },
+  fft_4_real: { complex: false, input: [-0.977277879876411, 0.9500884175255894, -0.1513572082976979, -0.10321885179355784], output: [-0.2817655224420774, 0.0, -0.8259206715787131, -1.0533072693191472, -1.9755046539061405, 0.0, -0.8259206715787131, 1.0533072693191472] },
+  fft_5_real: { complex: false, input: [0.41059850193837233, 0.144043571160878, 1.454273506962975, 0.7610377251469934, 0.12167501649282841], output: [2.891628321702047, 0.0, -1.2995143733619916, -0.4287475285900295, 0.8801964673568988, 0.6461585010760772, 0.8801964673568988, -0.6461585010760772, -1.2995143733619916, 0.4287475285900295] },
+  fft_7_real: { complex: false, input: [0.44386323274542566, 0.33367432737426683, 1.4940790731576061, -0.20515826376580087, 0.31306770165090136, -0.8540957393017248, -2.5529898158340787], output: [-1.0275594839734046, 0.0, -1.1794900746698127, -4.321336254238002, 0.4283827622463364, -2.2006199533718855, 2.8184083690191684, 1.0886333211894632, 2.8184083690191684, -1.0886333211894632, 0.4283827622463364, 2.2006199533718855, -1.1794900746698127, 4.321336254238002] },
+  fft_8_real: { complex: false, input: [0.6536185954403606, 0.8644361988595057, -0.7421650204064419, 2.2697546239876076, -1.4543656745987648, 0.04575851730144607, -0.1871838500258336, 1.5327792143584575], output: [2.9826326049163367, 0.0, 2.1657565005584045, -0.5450316795716824, 0.1286017912738714, 2.8923391221851134, 2.050212039519846, -1.6549940203328992, -6.442824504097697, 0.0, 2.050212039519846, 1.6549940203328992, 0.1286017912738714, -2.8923391221851134, 2.1657565005584045, 0.5450316795716824] },
+  fft_9_real: { complex: false, input: [1.469358769900285, 0.1549474256969163, 0.37816251960217356, -0.8877857476301128, -1.980796468223927, -0.3479121493261526, 0.15634896910398005, 1.2302906807277207, 1.2023798487844113], output: [1.3749938486352948, 0.0, 5.342425446245793, 2.975185521629713, -1.22457133247234, -0.6308804310909996, 0.41938606274358115, 1.5832577539331407, 1.3873773637166027, -0.8933243837257328, 1.3873773637166025, 0.8933243837257323, 0.41938606274358115, -1.5832577539331407, -1.2245713324723406, 0.6308804310909994, 5.342425446245792, -2.975185521629712] },
+  fft_16_real: { complex: false, input: [-0.3873268174079523, -0.30230275057533557, -1.0485529650670926, -1.4200179371789752, -1.7062701906250126, 1.9507753952317897, -0.5096521817516535, -0.4380743016111864, -1.2527953600499262, 0.7774903558319101, -1.6138978475579515, -0.2127402802139687, -0.8954665611936756, 0.386902497859262, -0.510805137568873, -1.180632184122412], output: [-8.363366266001053, 0.0, -1.4796937055641797, 0.20983245542766216, -0.3454288633172973, 2.968909080470098, 2.329350352646858, -0.7633502808629014, -0.5589507973309957, -6.064330201474169, -1.3963011418461777, 1.659405903990098, 2.2686580120389164, -0.3150779061389377, 4.008518665331395, -0.6106258774446865, -7.486167856443222, 0.0, 4.008518665331395, 0.6106258774446865, 2.2686580120389164, 0.3150779061389377, -1.3963011418461777, -1.659405903990098, -0.5589507973309957, 6.064330201474169, 2.329350352646858, 0.7633502808629014, -0.3454288633172973, -2.968909080470098, -1.4796937055641797, -0.20983245542766216] },
+  fft_25_real: { complex: false, input: [-0.028182228338654868, 0.42833187053041766, 0.06651722238316789, 0.3024718977397814, -0.6343220936809636, -0.3627411659871381, -0.672460447775951, -0.3595531615405413, -0.813146282044454, -1.7262826023316769, 0.17742614225375283, -0.4017809362082619, -1.6301983469660446, 0.4627822555257742, -0.9072983643832422, 0.05194539579613895, 0.7290905621775369, 0.12898291075741067, 1.1394006845433007, -1.2348258203536526, 0.402341641177549, -0.6848100909403132, -0.8707971491818818, -0.5788496647644155, -0.31155253212737266], output: [-7.327510303739734, 0.0, 1.377932222483735, 3.044921011687328, 0.06954965931518875, -5.483478593023237, 2.616166182267447, -2.0815154752919764, 0.1551657690860495, 0.5208228796162939, 0.3085012063215258, -2.1386609330432687, -0.566204290441261, 0.012329506565646242, 1.309786704987264, 3.1736358514734673, -1.0557581301013133, -2.4635866620117564, -2.579387724556426, 1.9885603872002973, 3.9572284078024613, -5.498314280945309, 0.6049416192890185, 2.7215390337882552, -2.8864443288170083, 0.6099589663561517, -2.8864443288170074, -0.6099589663561518, 0.604941619289019, -2.721539033788255, 3.9572284078024613, 5.498314280945309, -2.579387724556426, -1.988560387200297, -1.0557581301013133, 2.4635866620117564, 1.3097867049872634, -3.1736358514734673, -0.5662042904412615, -0.012329506565646575, 0.3085012063215258, 2.1386609330432687, 0.15516576908604957, -0.520822879616294, 2.6161661822674476, 2.0815154752919764, 0.06954965931518836, 5.4834785930232375, 1.377932222483735, -3.044921011687328] },
+  fft_27_real: { complex: false, input: [0.05616534222974544, -1.1651498407833565, 0.9008264869541871, 0.46566243973045984, -1.5362436862772237, 1.4882521937955997, 1.8958891760305832, 1.1787795711596507, -0.17992483581235091, -1.0707526215105425, 1.0544517269311366, -0.40317694697317963, 1.2224450703824274, 0.2082749780768603, 0.9766390364837128, 0.3563663971744019, 0.7065731681919482, 0.010500020720820478, 1.7858704939058352, 0.12691209270361992, 0.40198936344470165, 1.8831506970562544, -1.3477590611424464, -1.2704849984857336, 0.9693967081580112, -1.17312340511416, 1.9436211856492926], output: [9.485150752680255, 0.0, -3.3736195725050755, -0.6576671739932465, -1.4668314059671266, -3.7345058492453083, 0.41824676938222627, 1.9663430754071836, 5.709032912166549, 5.528638987648501, 1.8435730384179874, -0.34683544424097734, -4.963689356113413, -0.5177297293983665, -5.8830486994336315, 3.207542182273314, 1.8987452089640713, 2.1505302726369, 6.603715178395637, 5.036393219486801, 1.3865285438489818, 10.275020532162417, 0.1681971482993614, 2.6656111808114664, -3.3300735021919055, -3.3923756439906483, -2.9951195195022233, 4.64643931476696, -2.9951195195022233, -4.64643931476696, -3.3300735021919055, 3.3923756439906487, 0.16819714829936183, -2.665611180811466, 1.3865285438489812, -10.275020532162415, 6.603715178395637, -5.036393219486801, 1.8987452089640697, -2.1505302726368996, -5.8830486994336315, -3.207542182273315, -4.963689356113414, 0.5177297293983658, 1.8435730384179874, 0.34683544424097734, 5.709032912166549, -5.528638987648501, 0.41824676938222605, -1.9663430754071831, -1.4668314059671266, 3.7345058492453087, -3.373619572505076, 0.6576671739932474] },
+  fft_32_real: { complex: false, input: [-0.41361898075974735, -0.7474548114407578, 1.9229420264803847, 1.4805147914344243, 1.8675589604265699, 0.9060446582753853, -0.8612256850547025, 1.9100649530990337, -0.2680033709513804, 0.8024563957963952, 0.947251967773748, -0.1550100930908342, 0.6140793703460803, 0.9222066715665268, 0.37642553115562943, -1.0994007905841945, 0.298238174206056, 1.3263858966870303, -0.6945678597313655, -0.14963454032767076, -0.43515355172163744, 1.8492637284793418, 0.6722947570124355, 0.40746183624111043, -0.7699160744453164, 0.5392491912918173, -0.6743326606573761, 0.03183055827435118, -0.635846078378881, 0.6764332949464997, 0.5765908166149409, -0.20829875557799488], output: [11.014830327385901, 0.0, 1.2409883700304647, -5.276491159392059, -1.534551501998357, -4.8297133119274545, 1.336708303789408, -2.7137782948769944, -4.424467817605524, 0.843485461536611, -6.581416660337224, -0.1795449695375516, -2.514517651560188, -0.05943606215207886, -4.207556144474312, 7.447633642178459, -2.0080404448719507, -4.057057066134013, 1.4811356128841098, 3.432496305430468, 1.3934203835489032, -3.231520399869298, -4.317592600060283, 0.1993606400932625, -0.7034100876395168, 2.317901569810786, 3.7371408499766874, 5.7058540737147405, 6.345803325381664, -2.1851091826937408, 1.6157350284647216, 2.7643998907723595, -5.969395642755025, 0.0, 1.6157350284647212, -2.7643998907723604, 6.345803325381664, 2.1851091826937408, 3.737140849976688, -5.7058540737147405, -0.7034100876395168, -2.317901569810786, -4.317592600060284, -0.19936064009326138, 1.3934203835489032, 3.231520399869298, 1.4811356128841102, -3.432496305430468, -2.0080404448719507, 4.057057066134013, -4.207556144474311, -7.447633642178459, -2.514517651560188, 0.05943606215207886, -6.581416660337223, 0.1795449695375514, -4.424467817605524, -0.843485461536611, 1.3367083037894085, 2.7137782948769944, -1.534551501998357, 4.8297133119274545, 1.2409883700304651, 5.276491159392059] },
+  fft_49_real: { complex: false, input: [0.3960067126616453, -1.0930615087305058, -1.4912575927056055, 0.4393917012645369, 0.16667349537252904, 0.6350314368921064, 2.383144774863942, 0.9444794869904138, -0.9128222254441586, 1.117016288095853, -1.3159074105115212, -0.461584604814709, -0.06824160532463124, 1.7133427216493666, -0.7447548220484399, -0.8264385386590144, -0.0984525244254323, -0.6634782863621074, 1.126635922106507, -1.0799315083634233, -1.1474686524111024, -0.43782004474443403, -0.4980324506923049, 1.9295320538169858, 0.9494208069257608, 0.0875512413851909, -1.225435518830168, 0.8443629764015471, -1.0002153473895647, -1.5447710967776116, 1.1880297923523018, 0.3169426119248496, 0.920858823780819, 0.3187276529430212, 0.8568306119026912, -0.6510255933001469, -1.0342428417844647, 0.681594518281627, -0.8034096641738411, -0.6895497777502005, -0.45553250351734315, 0.01747915902505673, -0.35399391125348395, -1.3749512934180188, -0.6436184028328905, -2.2234031522244266, 0.6252314510271875, -1.6020576556067476, -1.1043833394284506], output: [-7.887557633860811, 0.0, -5.014267769110843, -4.376842071326452, -2.3853231496935923, -10.851256239812793, -3.2717318894892826, -5.326849080561187, -2.7264013581332716, 1.1514977438201093, -7.652841900031138, -3.7746076991956867, 5.309279658846421, 8.814060475350379, -2.6168809007788796, 4.677691230979557, 7.6656963597889805, 3.4858425270229887, 0.2740709971425779, -4.676330054757555, -5.155232202781799, 3.7556835779660136, 5.001888119593458, -3.6198301043995045, -0.0953704694177091, -3.6318583861463845, 2.767000800593773, -2.450144252684209, -1.2540039367717049, 9.279722666945066, 3.815471636849461, -3.3563499317203815, 12.763423104544493, 0.4158128174652558, 3.995279461690892, -6.6439121988180325, 3.2201986963478983, 0.6334206695259166, 2.4912752816319514, -1.9446295959593973, 1.027982187347956, -2.8785858483639863, 1.3490313376869536, 14.471900839049741, 1.0215246227635422, -0.4420763712931004, -1.0162745955034818, -0.3075325636480104, -5.867850815975937, -1.9145668225042987, -5.867850815975937, 1.9145668225042978, -1.0162745955034822, 0.3075325636480101, 1.0215246227635417, 0.4420763712931006, 1.3490313376869536, -14.471900839049741, 1.0279821873479558, 2.878585848363986, 2.491275281631952, 1.9446295959593989, 3.2201986963478975, -0.633420669525917, 3.9952794616908927, 6.6439121988180325, 12.763423104544493, -0.41581281746525595, 3.815471636849461, 3.3563499317203815, -1.2540039367717049, -9.279722666945066, 2.767000800593772, 2.4501442526842085, -0.0953704694177091, 3.631858386146384, 5.001888119593458, 3.6198301043995045, -5.1552322027818, -3.755683577966013, 0.2740709971425781, 4.676330054757556, 7.665696359788981, -3.4858425270229905, -2.6168809007788796, -4.677691230979557, 5.30927965884642, -8.814060475350379, -7.652841900031138, 3.774607699195687, -2.7264013581332716, -1.1514977438201108, -3.2717318894892826, 5.326849080561189, -2.385323149693591, 10.851256239812795, -5.014267769110844, 4.376842071326452] },
+  fft_64_real: { complex: false, input: [0.052165079260974405, -0.7395629963913133, 1.5430145954067358, -1.2928569097234486, 0.26705086934918293, -0.0392828182274956, -1.1680934977411974, 0.5232766605317537, -0.1715463312222481, 0.7717905512136674, 0.8235041539637314, 2.16323594928069, 1.336527949436392, -0.3691818379424436, -0.2393791775759264, 1.0996595958871132, 0.6552637307225978, 0.640131526097592, -1.6169560443108344, -0.024326124398935636, -0.7380309092056887, 0.27992459904323824, -0.09815038964295794, 0.9101789080925919, 0.31721821519130206, 0.7863279621089762, -0.46641909673594306, -0.9444462559182504, -0.41004969320254847, -0.017020413861440594, 0.3791517355550818, 2.259308950690852, -0.04225715166064269, -0.955945000492777, -0.34598177569938643, -0.4635959746460942, 0.4814814737734622, -1.5407970144446248, 0.06326199420033171, 0.1565065379653756, 0.23218103620027578, -0.5973160689653627, -0.237921729736007, -1.4240609089825316, -0.49331988336219407, -0.5428614760167177, 0.4160500462614255, -1.1561824318219127, 0.7811981017099934, 1.4944845444913688, -2.0699850250135325, 0.42625873077810095, 0.6769080350302455, -0.637437025552229, -0.39727181432879766, -0.13288057758695562, -0.2977908794017283, -0.3090129690471222, -1.6760038063299767, 1.15233156478312, 1.079618592036821, -0.8133642592042029, -1.466424327802514, 0.5210648764527586], output: [-1.64663603068023, 0.0, 1.505825929177829, -9.200008132744042, -2.544735819316532, -1.5339227544442826, -7.4360215710438755, -1.7984575917004604, 1.3619465096845937, 4.8456564834488915, 5.070506185450519, 4.849409603121612, 3.777805068935999, 0.17970844009281262, -5.309242543498751, -6.973066979348705, 5.944277107889729, 1.4068283873569305, -7.321235075656323, -8.916520841302049, -5.0314030969029835, 1.309770133246369, 3.721064537404591, -2.7547595090183408, 3.8878856671539617, 0.733440111196902, 5.958504522515222, -9.443751218054468, -3.743781835570924, 1.4088028338848555, 1.4543498973854279, -1.4051068307925165, 10.284222394185964, 6.362595288575114, 5.69300272333881, 2.3840261995408785, -2.344460920124865, 8.555867083200523, -10.10946038896856, -1.5537275998675408, -0.8110283215952525, 0.08930281805022311, -0.3624099573254269, 4.570396206585882, -7.6585567061471576, 9.739963432241709, -4.941569815253301, 0.6205797252271052, -7.291786374000026, -1.6649582074043856, 5.388745981879483, 1.3949577742938146, 1.4070140670266627, 11.226248771172266, 4.869069031848913, 4.140589494166391, 1.0264270218179838, 0.9000492064571382, 4.80696917175708, 0.26025510601075585, 4.725688003441722, 8.559147480463128, -1.4773429342657596, -6.3922359470118755, -4.01533581906691, 0.0, -1.4773429342657578, 6.392235947011875, 4.7256880034417215, -8.559147480463128, 4.80696917175708, -0.26025510601075497, 1.0264270218179838, -0.9000492064571382, 4.869069031848914, -4.140589494166391, 1.4070140670266635, -11.226248771172266, 5.388745981879483, -1.3949577742938146, -7.291786374000026, 1.6649582074043856, -4.941569815253302, -0.6205797252271044, -7.658556706147157, -9.739963432241707, -0.36240995732542824, -4.570396206585883, -0.8110283215952525, -0.08930281805022311, -10.109460388968563, 1.5537275998675408, -2.344460920124864, -8.555867083200523, 5.693002723338812, -2.38402619954088, 10.284222394185964, -6.362595288575114, 1.4543498973854279, 1.4051068307925176, -3.743781835570923, -1.4088028338848546, 5.95850452251522, 9.443751218054468, 3.8878856671539617, -0.733440111196902, 3.72106453740459, 2.754759509018341, -5.031403096902983, -1.309770133246368, -7.321235075656325, 8.916520841302049, 5.944277107889729, -1.4068283873569305, -5.309242543498751, 6.973066979348703, 3.777805068936, -0.1797084400928135, 5.07050618545052, -4.849409603121612, 1.3619465096845937, -4.8456564834488915, -7.4360215710438755, 1.7984575917004595, -2.544735819316533, 1.5339227544442826, 1.505825929177829, 9.200008132744042] },
+  fft_81_real: { complex: false, input: [-0.5757879698130661, 0.14195316332077967, -0.3193284171450952, 0.6915387510701866, 0.6947491436560059, -0.7255973784635843, -1.3833639553950554, -1.582938397335082, 0.6103793791072052, -1.188859257784029, -0.5068163542986875, -0.5963140384505081, -0.05256729626954629, -1.936279805846507, 0.18877859679382855, 0.5238910238342056, 0.08842208704466141, -0.3108861716984717, 0.09740016626878341, 0.3990463456401302, -2.77259275642665, 1.9559123082506942, 0.39009332268792646, -0.65240858238702, -0.3909533751876011, 0.49374177734918845, -0.11610393903436653, -2.0306844677814944, 2.0644928613593194, -0.11054065723247261, 1.0201727117157997, -0.6920498477843912, 1.5363770542457977, 0.28634368889227957, 0.6088438344754508, -1.0452533661469547, 1.2111452896827009, 0.6898181645347884, 1.3018462295649984, -0.6280875596415789, -0.4810271184607877, 2.303916697683942, -1.0600158227215473, -0.13594970067832082, 1.1368913626026953, 0.0977249677148556, 0.5829536797532936, -0.3994490292628752, 0.37005588784751875, -1.3065268517353166, 1.658130679618188, -0.11816404512856976, -0.6801782039968504, 0.6663830820319143, -0.4607197873885533, -1.3342584714027534, -1.3467175057975553, 0.6937731526901325, -0.1595734381462669, -0.13370155966843916, 1.0777438059762627, -1.1268258087567435, -0.7306777528648248, -0.38487980918127546, 0.094351589317074, -0.042171451290578935, -0.2868871923899076, -0.0616264020956474, -0.10730527629117469, -0.7196043885517929, -0.8129929885540773, 0.2745163577239395, -0.8909150829955279, -1.1573552591908536, -0.3122922511256933, -0.1576670161638159, 2.2567234972982093, -0.7047002758562337, 0.9432607249694948, 0.7471883342046318, -1.188944955203736], output: [-6.019981320094997, 0.0, -11.024215089600144, -2.2296309610489784, 7.885780110732945, 6.9546439213354425, 2.1468923343095865, 2.0704996330469774, 5.223342327125926, 3.3335079943959576, 6.670733097535111, 3.059813085584419, -1.291987686750553, 0.4705982966401041, -2.6851539401562734, 0.13762863856647578, -2.2292463073335753, 1.3031715676222848, -8.459551141726745, -1.1728586245314498, -5.19534088481691, -4.1102647711302005, 4.442176173858688, -5.752409619199853, -10.978436211517131, -2.360518158521794, -8.97056708617231, -4.44028490950546, -0.7378544972090442, 0.3833473146759321, 2.41788584599411, 2.094126839887842, -0.9134777730571964, 0.6964431384964922, -1.348881203926696, 4.273283923628751, 2.6990650914215637, -0.04446227794217483, -2.8201900136765587, 10.405176121148552, 4.561086132295523, -4.8957475545660945, -9.626965987488632, 5.703554469280806, 5.118571440405241, -0.47801741518584884, 4.087758685622818, -16.982284140767902, -1.672788156348311, -8.247953480159863, -15.76267685360949, 6.557268666342, 8.45494487748653, -7.023784282842257, 0.9696988388268291, 0.6791846860727092, -3.7628962341106327, -6.731131201638246, 0.3164269710342955, -0.346532027732811, 5.824835704304008, -8.818250625842744, 5.382334715219444, -1.672571325598101, 6.240545283378912, -5.74548463471282, -6.38340050971792, 3.0747648212362937, 11.52437440252928, -1.3394713252215091, 6.27611320575086, 6.861171951341367, -10.764313909223372, -10.425524196300067, -8.17856146704236, 14.338103920769218, 4.86550855683382, 5.452700220702592, -3.278038589307964, -1.8007559828104207, 0.6670476307446598, 9.785124153822995, 0.6670476307446602, -9.785124153823, -3.2780385893079633, 1.8007559828104218, 4.865508556833819, -5.452700220702592, -8.178561467042359, -14.338103920769221, -10.764313909223372, 10.425524196300069, 6.276113205750862, -6.861171951341369, 11.524374402529276, 1.3394713252215045, -6.3834005097179185, -3.0747648212362932, 6.2405452833789115, 5.745484634712819, 5.382334715219443, 1.6725713255980996, 5.824835704304008, 8.818250625842744, 0.31642697103429596, 0.34653202773281055, -3.762896234110635, 6.731131201638245, 0.9696988388268291, -0.6791846860727092, 8.45494487748653, 7.023784282842258, -15.762676853609491, -6.557268666342001, -1.67278815634831, 8.247953480159865, 4.087758685622817, 16.982284140767902, 5.118571440405242, 0.47801741518584784, -9.626965987488635, -5.703554469280807, 4.561086132295526, 4.895747554566093, -2.82019001367656, -10.405176121148555, 2.699065091421563, 0.04446227794217439, -1.3488812039266977, -4.273283923628747, -0.9134777730571959, -0.6964431384964889, 2.417885845994109, -2.0941268398878425, -0.7378544972090446, -0.3833473146759312, -8.97056708617231, 4.4402849095054595, -10.978436211517133, 2.3605181585217934, 4.442176173858688, 5.752409619199854, -5.195340884816912, 4.1102647711302005, -8.459551141726745, 1.1728586245314485, -2.2292463073335775, -1.3031715676222801, -2.6851539401562716, -0.13762863856647434, -1.291987686750553, -0.47059829664010344, 6.670733097535112, -3.0598130855844157, 5.223342327125925, -3.3335079943959554, 2.1468923343095883, -2.0704996330469765, 7.8857801107329415, -6.9546439213354425, -11.024215089600146, 2.229630961048978] },
+  fft_125_real: { complex: false, input: [0.7732529774025997, -1.1838806401933177, -2.659172237996741, 0.6063195243593807, -1.7558905834377194, 0.45093446180591484, -0.6840108977372166, 1.6595507961898721, 1.068509399316009, -0.45338580385138766, -0.6878376110286823, -1.2140774030941206, -0.4409226322925914, -0.2803554951845091, -0.3646935443916854, 0.15670385527236397, 0.5785214977288784, 0.349654456993174, -0.764143923906443, -1.4377914738015785, 1.3645318481024713, -0.6894491845499376, -0.6522935999350191, -0.5211893123011109, -1.8430695501566485, -0.4779740040404867, -0.47965581400794766, 0.6203582983435125, 0.698457149107336, 0.00377088908626934, 0.9318483741143037, 0.339964983801262, -0.01568211160255477, 0.16092816829822298, -0.19065349358139935, -0.3948495140334503, -0.26773353689396645, -1.1280113314700069, 0.280441705316296, -0.9931236109295807, 0.8416312640736364, -0.24945858016094885, 0.04949498165009074, 0.49383677628095635, 0.6433144650629279, -1.5706234086334527, -0.20690367616397173, 0.8801789120807822, -1.6981058194322545, 0.3872804753950634, -2.2555642294021894, -1.0225068436356035, 0.0386305518401881, -1.6567151023219537, -0.9855107376841507, -1.4718350074635869, 1.6481349322075596, 0.16422775548733395, 0.5672902778526694, -0.2226751005151545, -0.35343174875719907, -1.6164741886510325, -0.2918373627478628, -0.7614922118116233, 0.8579239242923363, 1.1411018666575734, 1.4665787155741776, 0.852551939461232, -0.5986539369229861, -1.1158969859603944, 0.7666631816450861, 0.3562928174722889, -1.7685384506770307, 0.35548179274376907, 0.8145198224878664, 0.05892558918162996, -0.18505367100934153, -0.8076484876163557, -1.4465346995633879, 0.8002979493400275, -0.3091144447717088, -0.23346666154369272, 1.7327211869191332, 0.6845011068591904, 0.3708250012811021, 0.14206180518723566, 1.5199948607657727, 1.7195893074161945, 0.9295051114795281, 0.5822245913979243, -2.0946030712061448, 0.12372191423350658, -0.130106954193704, 0.09395322938556872, 0.9430460873225178, -2.7396771671895563, -0.5693120534701851, 0.26990435494076137, -0.4668455460527625, -1.4169061131262595, 0.8689634868967954, 0.27687190584612803, -0.9711045704444846, 0.3148172045158238, 0.8215857120497958, 0.005292646299360854, 0.8005648034309968, 0.07826017516166135, -0.39522898265435435, -1.159420516399913, -0.08593076697161273, 0.19429293804577166, 0.8758327615873309, -0.11510746848722672, 0.4574156062209908, -0.9646120137337284, -0.7826291558275251, -0.11038929902688775, -1.0546284639850139, 0.8202478373246812, 0.4631303293186071, 0.2790957643924534, 0.33890412521594454, 2.0210435614847975, -0.46886418796679563], output: [-12.976781205595502, 0.0, -0.5336306740788994, 9.664313658528595, -2.250865418762321, -6.399236976457672, 4.984600678216093, 10.085773094158728, 4.054865444587319, 1.1220280117249106, -12.381959140729, -4.245941970428558, 11.300429673882508, -3.867369304887277, -1.924145132324413, 15.625845728307613, -3.683967210179358, 9.246098611130797, 6.678100602051274, -5.8877245671287906, -11.142505237356518, 1.2462822325294702, -4.950012182132614, 15.368429468743631, -2.7544743905540385, 7.397618288416753, 4.896491435868608, 21.15703724867305, -10.418689778983975, 1.697253164520248, 2.2518122169760852, 14.869916845413385, -5.553898084870594, -3.616357859997289, 8.688187732450988, 9.51306110875087, 1.8459884086796337, -6.750879404311422, 10.389191729731694, -8.563754760867933, 6.323482750892878, -0.814569185033468, -6.339262858307836, 12.395809458636048, -5.5325241155089815, -4.087114562924723, 5.264130311599194, 5.174155710626681, 0.07817803409444535, 4.502699566709124, -6.841988594388185, -4.209615030635216, -3.014915877040603, 0.7642671251818629, 4.979942269363006, -0.7954973970411463, -9.213201050170268, 20.281572828473664, 2.195700817994501, -2.8150917630751158, 3.66256874231074, 4.947718687255637, 2.417023995325397, 6.311630702081038, 8.910067289733103, -1.3549619066609422, -7.604782781699578, 3.2671105424317712, -0.9261608630826195, -10.26033603214804, 2.477431685753063, -0.5368669007709066, 6.588511626180253, 3.6391310126388206, -0.7696459233665145, 10.849962526266822, 0.8214214627634562, -9.665598459357405, -4.208875535725593, -8.955547690168373, 2.3998423294426914, -6.900645072282014, 10.561899982588944, -6.1497412675557115, -10.104315792100294, 11.339404667032188, 4.041967950367949, -10.290041543502431, 9.533068142456843, -5.3782354906044025, 4.12027245501964, -0.5914284618824925, 12.365588431711828, -2.701635423848076, -6.366846689539269, -5.370727636437312, 6.689816472907384, 7.334498752981377, 13.69756331474747, 3.7921571289587113, -0.2721490559996148, 0.2084376847247582, 9.275694421318246, -16.06448352473527, 1.3029488980606758, -8.03640349313321, 0.20226177706452786, 6.0912757829407465, -4.452961538391443, -15.072885312247177, -0.8419482730978101, -7.04697219502747, 12.864547051022127, -6.206422060382598, -6.149387177052475, -15.825612767912974, -0.5432046258303531, -0.24636987696629387, 0.5819230336105994, -10.272301011724789, 3.8285784744531433, 1.1829320961516245, -5.404044842034781, 0.4066057531145235, -1.2770351094581365, 7.590779935818785, -1.2770351094581383, -7.590779935818781, -5.404044842034781, -0.4066057531145224, 3.828578474453144, -1.1829320961516248, 0.5819230336105993, 10.272301011724787, -0.54320462583035, 0.2463698769662941, -6.149387177052476, 15.825612767912975, 12.864547051022129, 6.206422060382598, -0.8419482730978074, 7.04697219502747, -4.452961538391442, 15.072885312247177, 0.20226177706453008, -6.091275782940745, 1.3029488980606758, 8.03640349313321, 9.27569442131825, 16.06448352473527, -0.2721490559996148, -0.2084376847247582, 13.69756331474747, -3.792157128958717, 6.689816472907385, -7.334498752981374, -6.366846689539267, 5.370727636437312, 12.365588431711828, 2.7016354238480744, 4.120272455019638, 0.5914284618824928, 9.533068142456843, 5.378235490604402, 4.041967950367946, 10.290041543502431, -10.104315792100294, -11.339404667032191, 10.561899982588944, 6.1497412675557115, 2.399842329442692, 6.900645072282014, -4.208875535725592, 8.955547690168373, 0.8214214627634587, 9.665598459357408, -0.7696459233665118, -10.849962526266822, 6.588511626180252, -3.6391310126388214, 2.477431685753063, 0.536866900770905, -0.9261608630826198, 10.260336032148041, -7.604782781699578, -3.2671105424317712, 8.9100672897331, 1.354961906660943, 2.4170239953253976, -6.311630702081036, 3.662568742310737, -4.947718687255637, 2.1957008179945006, 2.815091763075113, -9.213201050170266, -20.281572828473664, 4.979942269363008, 0.7954973970411481, -3.0149158770406013, -0.7642671251818633, -6.841988594388185, 4.209615030635216, 0.07817803409444402, -4.502699566709122, 5.264130311599196, -5.174155710626682, -5.532524115508983, 4.087114562924725, -6.339262858307835, -12.395809458636048, 6.323482750892879, 0.8145691850334673, 10.389191729731692, 8.563754760867932, 1.845988408679628, 6.75087940431142, 8.68818773245099, -9.51306110875087, -5.553898084870594, 3.6163578599972883, 2.251812216976085, -14.869916845413385, -10.418689778983971, -1.6972531645202498, 4.896491435868606, -21.15703724867305, -2.7544743905540354, -7.397618288416755, -4.950012182132614, -15.368429468743635, -11.142505237356517, -1.2462822325294698, 6.678100602051274, 5.8877245671287906, -3.683967210179361, -9.246098611130797, -1.9241451323244125, -15.625845728307619, 11.30042967388251, 3.8673693048872773, -12.381959140729, 4.245941970428559, 4.054865444587321, -1.122028011724911, 4.984600678216097, -10.085773094158727, -2.250865418762321, 6.399236976457672, -0.5336306740788972, -9.664313658528593] },
+  fft_128_real: { complex: false, input: [-2.201441285500558, 0.1993001968964652, -0.050603540961665895, -0.5175190425104033, -0.9788298593587699, -0.43918952180214793, 0.18133842921782128, -0.5028167006425383, 2.4124536795437486, -0.960504381633148, -0.7931173627076716, -2.2886200400145285, 0.251484415021537, -2.01640662779976, -0.5394546333745014, -0.27567053456055696, -0.7097279658468882, 1.738872677454511, 0.9943943913154989, 1.3191368763015756, -0.8824188185499185, 1.1285940645145685, 0.4960009463439622, 0.7714059486768455, 1.0294388287827672, -0.9087632459590531, -0.4243176209779015, 0.8625960113284511, -2.655619092974933, 1.5133280825732052, 0.553132064207584, -0.045703960660234855, 0.2205076557571733, -1.0299352833089765, -0.34994336458910474, 1.1002843382203737, 1.2980219723262212, 2.6962240525635797, -0.07392466628041514, -0.6585529668050037, -0.5142339659399888, -1.0180418752873648, -0.07785475594085076, 0.38273243001226814, -0.03424228053195387, 1.0963468456657985, -0.23421580134453654, -0.3474506524985633, -0.5812684768603252, -1.6326345262344952, -1.567767724308454, -1.1791579306376878, 1.3014280716647608, 0.8952602728899299, 1.3749640663929898, -1.3322116545945017, -1.9686246897860202, -0.6600563201340829, 0.175818953296028, 0.4986902749098275, 1.0479721559680528, 0.2842796708072146, 1.7426687806556311, -0.22260568094832048, -0.9130792180417964, -1.6812182154944335, -0.8889713580954499, 0.242117960985123, -0.8887202573536308, 0.9367424635352571, 1.412327706037443, -2.369586905226603, 0.8640523004976479, -2.2396040586617367, 0.4014990550902875, 1.2248705641936597, 0.06485610634357618, -1.2796891732042395, -0.5854312042777726, -0.2616454457109007, -0.18224478378994294, -0.20289684076666706, -0.1098827793093138, 0.2134800489101689, -1.2085736537332212, -0.2420198298702195, 1.5182611703557054, -0.38464542314251776, -0.4438360931551978, 1.0781973037142378, -2.5591846663440965, 1.1813786012882859, -0.6319037580051673, 0.16392857245258663, 0.09632135592119682, 0.9424681192203938, -0.2675947462353477, -0.6780257815644504, 1.2978457906510987, -2.36417381714118, 0.02033418170524325, -1.3479254226291204, -0.761573388256559, 2.011256681463137, -0.044595426455857026, 0.19506969715138117, -1.7815628557055914, -0.7290446587946957, 0.19655740072878491, 0.3547576931132181, 0.6168865543932788, 0.008627898917576322, 0.5270042084546597, 0.453781912635684, -1.8297404110045314, 0.03700572191014953, 0.7679024077327037, 0.5898798207345195, -0.3638588099707899, -0.8056265075393678, -1.1183119243216322, -0.13105401154141233, 1.133079879559722, -1.951804101481602, -0.659891729729498, -1.139802455426774, 0.7849575212405001, -0.5543096265713009], output: [-17.373351338172824, 0.0, -2.9147793143550498, -7.299382227996462, -12.507738907636103, -1.0091552608283472, -8.14898282968933, 5.307325780993493, -0.034750314603287924, 4.577509972521205, -7.468519860234688, -2.7826003571131217, 5.138209694069926, 2.2706200801626815, 10.580893461437023, -11.281416288223713, -2.4076277186241555, -8.076888559746063, -7.7255720166232775, -7.240220313440044, -11.221660986331635, 5.410676842717731, -0.7666763204502667, 0.23941154418990696, -1.1567394707746503, 8.29598249679744, -5.425512551243248, 4.4002233124645125, 4.01519747432619, 9.515412040403152, 17.306448507916105, -15.673102695422665, -9.901937950321809, 18.738864037440322, 19.21931801890001, 4.139374296179019, -15.995833331174268, -14.354578092775146, -1.028728623054629, -10.403106776550125, -2.312215325199424, 0.9570154846116288, 12.655685692773659, -2.588769695009625, 14.96829370833483, -0.28008251578319854, -19.11873843663264, -1.1020798388070263, 2.4267878006424475, 2.4199460094928953, -7.0321828345356066, -2.542686317569949, -7.804615781068486, 7.887570592086873, -6.609647506979987, 10.661665300030329, -13.524024198497745, 11.81604749808395, -1.07822493702058, -1.484839411655623, -11.897938634419027, -8.105846942980566, 5.44601793359646, -6.0339961403022055, -6.671236362873311, -1.7118899285267464, -0.9493422927794772, -2.6019479213685455, 0.783595826718674, 1.6178298328406964, -1.9230298563197574, -5.115726520029282, -7.036766891552735, -8.977589519991337, -6.579537548860962, -0.34069352829828503, -9.041064010465133, -6.282136934564469, 3.7117329189048838, -5.394231282736409, -11.177409462832511, 2.636948493512454, -6.698531905653285, -6.518503941570649, 8.905271814948328, 12.87201877019853, 7.090676972577299, 8.341357540310314, -4.648696854100845, 6.957035567575479, -2.241879985890022, -15.193281739525478, 1.2271924686489228, 4.616886618458665, -14.338441214676216, 14.862058456971615, 8.10221962201912, -6.558552885146747, -4.313458061854922, -6.800570539708076, -9.46963628765032, -14.706255169181524, -1.0508252086336558, 14.12155372134561, 10.30295927948794, 1.482579183504495, -9.087461443620512, 7.457473719466465, -10.18793804336269, -25.247790452763812, 4.02147223954335, -15.36278457408435, -6.138499904099753, -0.4601627652745437, -8.437677953855983, -1.9355996291504027, -16.216877073460964, -6.537035151324876, 4.1897004524761075, -2.2519743698026677, 0.6872691674164905, 12.722533287194196, 0.2882873356987652, 5.116890429328395, 20.226607459494886, 1.290674771102844, -2.800068989539933, -1.5108108590900127, 3.1832454973443687, 0.0, -2.8000689895399358, 1.5108108590900131, 20.226607459494886, -1.290674771102844, 0.2882873356987643, -5.116890429328395, 0.6872691674164899, -12.722533287194196, 4.189700452476105, 2.2519743698026673, -16.216877073460967, 6.537035151324876, -8.437677953855987, 1.9355996291504054, -6.138499904099753, 0.4601627652745437, 4.021472239543354, 15.362784574084351, -10.187938043362688, 25.247790452763812, -9.087461443620512, -7.457473719466468, 10.30295927948794, -1.4825791835044928, -1.0508252086336576, -14.121553721345606, -9.46963628765032, 14.706255169181524, -4.31345806185492, 6.800570539708074, 8.10221962201912, 6.558552885146747, -14.338441214676216, -14.862058456971612, 1.2271924686489228, -4.616886618458665, -2.241879985890021, 15.193281739525482, -4.648696854100845, -6.957035567575479, 7.090676972577298, -8.341357540310312, 8.905271814948328, -12.872018770198533, -6.698531905653283, 6.518503941570645, -11.177409462832511, -2.636948493512454, 3.7117329189048833, 5.394231282736407, -9.041064010465133, 6.282136934564471, -6.579537548860961, 0.3406935282982859, -7.036766891552734, 8.977589519991337, -1.9230298563197554, 5.115726520029284, 0.783595826718674, -1.6178298328406973, -0.9493422927794772, 2.601947921368547, -6.671236362873311, 1.7118899285267464, 5.446017933596458, 6.033996140302204, -11.897938634419027, 8.105846942980568, -1.0782249370205794, 1.4848394116556225, -13.524024198497745, -11.81604749808395, -6.609647506979986, -10.661665300030329, -7.804615781068486, -7.887570592086873, -7.032182834535609, 2.5426863175699514, 2.4267878006424475, -2.4199460094928953, -19.11873843663264, 1.1020798388070299, 14.96829370833483, 0.2800825157832003, 12.655685692773659, 2.588769695009626, -2.312215325199422, -0.9570154846116292, -1.02872862305463, 10.403106776550121, -15.995833331174268, 14.354578092775146, 19.21931801890001, -4.139374296179023, -9.901937950321809, -18.738864037440322, 17.3064485079161, 15.673102695422664, 4.01519747432619, -9.515412040403152, -5.425512551243248, -4.400223312464516, -1.1567394707746494, -8.295982496797441, -0.7666763204502649, -0.23941154418990784, -11.221660986331633, -5.410676842717731, -7.725572016623278, 7.240220313440043, -2.4076277186241555, 8.076888559746063, 10.580893461437023, 11.281416288223713, 5.138209694069921, -2.2706200801626824, -7.468519860234687, 2.782600357113122, -0.0347503146032867, -4.577509972521205, -8.148982829689329, -5.307325780993491, -12.507738907636105, 1.0091552608283463, -2.9147793143550498, 7.299382227996464] },
+  fft_2_complex: { complex: true, input: [0.3076825439929962, -1.695198893547058, -1.7243221998214722, 0.2783026695251465], output: [-1.416639655828476, -1.4168962240219116, 2.0320047438144684, -1.9735015630722046] },
+  fft_3_complex: { complex: true, input: [0.9383012056350708, -0.6457691788673401, -0.8741029500961304, -0.8208730816841125, -1.1462081670761108, 1.1436293125152588], output: [-1.0820099115371704, -0.32301294803619385, 0.24714778504918455, -1.042797324689853, 3.649765743393198, -0.5714972638759733] },
+  fft_4_complex: { complex: true, input: [1.4125794172286987, 0.6605323553085327, 0.28041958808898926, -0.03313862159848213, -1.6822501420974731, -0.03843749687075615, -0.424235999584198, -0.8116567134857178], output: [-0.41348713636398315, -0.22270047664642334, 3.8733476512134075, -0.005685735493898392, -0.12585431337356567, 1.4668901935219765, 2.3163114674389362, 1.4036254398524761] },
+  fft_5_complex: { complex: true, input: [0.17418311536312103, -1.2342511415481567, 1.477837324142456, 1.1353919506072998, -0.21928854286670685, 0.8536889553070068, -0.5614511966705322, 0.6619521379470825, -0.8825250864028931, 0.19404278695583344], output: [-0.011244386434555054, 1.6108246892690659, 1.9977527610189005, -4.495568829702767, -0.17773784354010536, -2.9033977754116895, -0.9196550557167773, -0.7794573885094949, -0.018199898511857437, 0.39634359661410157] },
+  fft_7_complex: { complex: true, input: [1.2519853115081787, -0.8415557742118835, 1.4540354013442993, -2.404756546020508, -1.4876911640167236, -1.494455099105835, -0.5286277532577515, -2.204728603363037, -0.5613620281219482, -0.7104699611663818, 0.4594193398952484, -0.6865041255950928, 0.4614850580692291, 0.4871855676174164], output: [1.0492441654205322, -7.855284541845322, -0.039888223328758965, 2.182752912893351, -0.22803454984866334, -2.054368234099608, -2.9522900484683143, -1.8098744525563912, 1.2074691685152796, 2.159880155954229, 2.3732051464911823, 1.5194156322450416, 7.354191521775993, -0.03341189207448503] },
+  fft_8_complex: { complex: true, input: [0.970332682132721, 0.02602463774383068, 0.019165080040693283, -0.39630618691444397, 0.030557435005903244, 0.20711614191532135, 2.1260392665863037, -0.4882599413394928, 0.38254037499427795, -0.11685442924499512, 0.10177633911371231, -0.9286617040634155, 1.243769645690918, 1.106126070022583, 0.0841875895857811, -0.24541817605495453], output: [4.9583684131503105, -0.8362335879355669, -1.6087226322558026, 0.5188463473486555, -0.5127437971532345, 0.6852134335786104, 3.193741520912889, -3.0038725843536103, 0.2960318624973297, 3.2810584288090467, 0.9862873903181653, 2.1933362079990255, 0.6698357500135899, -3.493357440456748, -0.22013705042147969, 0.8632062969612324] },
+  fft_9_complex: { complex: true, input: [-0.06354460120201111, -0.2525795102119446, -1.3290656805038452, 1.239378571510315, -0.2628045976161957, -0.6161656975746155, 1.4840424060821533, -0.8743874430656433, 0.5852842926979065, -1.1418819427490234, -0.6792443990707397, -0.5788629055023193, -0.40828627347946167, 2.353774309158325, -0.34253063797950745, 1.3391598463058472, -0.5310598015785217, 0.7086358070373535], output: [-1.5472092926502228, 2.1770710349082947, -6.615882017596261, 0.6056449746473178, 2.5839119815832605, 0.5586794569181851, 3.9573314649336995, 0.4166996852974395, -0.136326996045127, -2.6370722903002877, 1.4693146501411956, -2.4984601837994824, 0.6265124219185647, 1.0866513474364776, -3.439069243722412, -5.861805640552551, 2.529515620619202, 3.879376023537104] },
+  fft_16_complex: { complex: true, input: [1.0133312940597534, 0.3585434854030609, -0.6402965188026428, -1.3225007057189941, -0.4752223789691925, -0.5362467765808105, 1.2483383417129517, 0.05567394196987152, -0.041222695261240005, -0.5106480717658997, -0.34942546486854553, -1.4202176332473755, 1.2869471311569214, 1.1659399271011353, -1.326190710067749, 0.7442878484725952, 1.092625379562378, -0.6453229188919067, 0.3833552300930023, 0.4733515679836273, -0.6908642053604126, -1.0344504117965698, -0.07214371114969254, 1.7793974876403809, 0.4729478657245636, -0.7470325827598572, -0.46451860666275024, 1.7667423486709595, 0.8151687383651733, 1.0275626182556152, -0.7450098395347595, 0.052204303443431854], output: [1.5078198499977589, 1.207284428179264, -4.4806762050351825, -1.1643592216953969, -4.103445578388806, -0.030867462785639788, -0.10790451872168705, -0.5227224508100916, -1.532535444945097, -1.991386003792286, 2.03052393483773, 6.085669694760458, 7.229743305508964, -3.4075612910619277, -5.958691786804708, 2.256651932037089, 5.43960240855813, -3.050593890249729, 5.332886654634274, 3.73714542609058, -0.0764908827618398, 8.509074812562861, 0.7390405363915563, 0.02108386955870034, 4.735840562731028, -2.3431448861956596, -2.2543726824714905, -2.586308038032556, 3.6471191682769133, -1.1870411745676497, 4.064841383148511, 0.2037700224509582] },
+  fft_25_complex: { complex: true, input: [-1.9739670753479004, -0.837864100933075, -1.1114044189453125, -0.5064931511878967, -0.32460978627204895, -0.5005347728729248, -0.3506036102771759, 0.0742226392030716, -0.37107688188552856, 0.5423977971076965, -1.9017586708068848, 0.6719839572906494, 2.0592153072357178, 0.39552798867225647, 0.16856394708156586, 0.19291874766349792, 0.04277687519788742, -0.04168238118290901, -1.3544977903366089, 0.5823065638542175, 0.5274385809898376, 0.40750083327293396, -0.8166235089302063, 0.9344403743743896, -0.633162260055542, 1.6156861782073975, -0.017774872481822968, -0.4030669629573822, 1.5376009941101074, -0.5414798855781555, 1.5840349197387695, 0.37068140506744385, -1.5073336362838745, -0.4143368899822235, 0.0665762796998024, 1.2245029211044312, 0.3779613673686981, -1.7612340450286865, 0.9021579623222351, 0.02208423614501953, 0.017875323072075844, 0.09636616706848145, -0.1060614064335823, -0.9343460202217102, 0.9497752785682678, -2.030526876449585, 1.145573616027832, -0.636160671710968, 1.1762216091156006, -0.4195115566253662], output: [0.08689814247190952, -1.8966175056993961, 4.5468912855738495, -1.8818154119554684, -0.5200552972284029, 1.2734341399283204, 0.4632869234518082, 5.931840244626491, -2.9589503069493865, -0.5547667441139432, -1.5273151665934335, 6.215078295347433, -15.640798930847374, 2.4457132590529254, 2.0174810051387153, 0.018505660090085918, -0.195679780600583, -1.5733085555890094, 2.608942923248818, -1.8646958677420233, -5.164085446696965, 1.34215964208527, -5.615886183114355, -0.2185132207952143, 5.847780025346529, 4.8320068942200045, -0.10823882684491926, -0.9458843955572025, -9.968247687952289, -5.8960346192185416, 1.8915989122741381, -0.7760332258366917, -5.750397961307298, 3.3450370182735623, -0.40650253504753975, -3.1697133057132096, 3.105915528359142, -7.099303168364118, -6.373188277089521, 1.2862869596061528, -4.01898105322616, -1.3412458970644465, 3.983941673212713, 2.3932716249692954, -4.4090529271307455, -8.431989160831321, -3.768856482573692, -3.025597080828077, -7.475676439572469, -11.354418102217751] },
+  fft_27_complex: { complex: true, input: [0.7116051316261292, 1.037112832069397, 0.6106399893760681, 0.7972601652145386, -0.2644272744655609, -1.5348200798034668, 0.42119401693344116, -0.22403796017169952, -1.1107473373413086, -0.7211647629737854, -0.6500771641731262, 1.2391200065612793, -0.15565899014472961, -0.5251198410987854, 1.8477299213409424, 0.3672321140766144, -1.4575940370559692, -0.28738635778427124, 0.4544501006603241, 0.10830547660589218, 0.5361469388008118, -0.3605248034000397, -1.8324573040008545, 0.07127898186445236, 0.04289998486638069, 0.9799603819847107, 0.5673006772994995, 0.6624241471290588, -0.3812290132045746, -1.7817859649658203, 0.15860582888126373, -1.0848448276519775, -0.7432076334953308, 0.5504264831542969, -2.300358772277832, -0.41666755080223083, -3.017878293991089, 1.61266028881073, -0.419252872467041, -0.20492435991764069, 0.36734986305236816, -1.0211094617843628, -0.3779532313346863, 0.3455152213573456, 0.3696709871292114, -0.5317375659942627, 0.7667483687400818, -0.7377064824104309, 1.1323468685150146, -0.4188673496246338, 0.1713690608739853, 0.8521258234977722, 1.9920309782028198, 0.2819546163082123], output: [-2.560753207653761, -0.9453208297491074, 9.347935323529054, -0.9253007594026554, 3.3062934251957525, 9.624685892964905, -3.747491429172474, 6.944443936382198, 0.2846532016836547, -2.186295990939281, 1.6811782376183848, 6.6863808498799235, -6.517827112663515, 0.14943438287348965, 5.055696157497922, 1.2377621172326654, 5.652717825977259, 5.273448677870682, 5.183011380415665, -1.6221043494072749, -7.350588286919974, 3.2159710253980176, 6.887033244512073, 2.2722794132317587, 3.079426655802572, 0.7414325238817026, -3.3293146897618247, -5.438193278707893, -7.072286065035183, -1.106348191005082, -6.900230177764772, 4.314765981558758, -0.8251297022533834, -3.636347937894679, -0.8573794050807142, -0.5671438615765814, -4.513423924725758, 8.05947784599932, 2.720475531609292, -11.224192327686906, -1.8276595977578185, -0.7655023451245344, 1.4131230144639861, 4.705230182137967, -4.606202193101245, 1.2945168184514193, 14.000104083285905, 4.499731058762659, -2.1022427540436652, 2.4753477036971208, 4.917062359267286, -7.812371522652364, 7.895156658980767, 2.736259449697491] },
+  fft_32_complex: { complex: true, input: [0.8450640439987183, 1.3659120798110962, -1.0280054807662964, -0.08084108680486679, 0.9755204916000366, 0.5328992605209351, -0.06296933442354202, -0.08816881477832794, -0.6170272827148438, -1.8309184312820435, -1.3914250135421753, -0.5434551239013672, 1.1874107122421265, 0.6239242553710938, 0.13674135506153107, 0.8248231410980225, 0.44601964950561523, -0.5365520715713501, -1.2611466646194458, -1.2415540218353271, 0.03862550109624863, 0.7438274025917053, -0.964236855506897, 1.1258167028427124, 0.41451361775398254, 0.16199752688407898, -0.17522190511226654, 0.2171468436717987, -2.5754177570343018, -0.09452058374881744, -1.6540249586105347, -0.7705362439155579, -0.03492758423089981, -0.2652542293071747, 0.044895902276039124, 0.3964420557022095, -0.40189823508262634, -1.0244829654693604, -0.7180095911026001, 1.4870411157608032, -0.07942715287208557, -0.15951769053936005, -1.0193690061569214, -2.5220484733581543, 0.04660886898636818, 0.8388492465019226, 1.0369007587432861, -1.2317235469818115, -0.41559332609176636, -0.9562405943870544, -0.6224948167800903, -0.6581323146820068, 0.2363770604133606, -0.2054009884595871, 2.034836530685425, 0.08647070080041885, -1.3155070543289185, -1.5467650890350342, 1.402755856513977, -0.016710011288523674, 0.732329785823822, 0.09752081334590912, 1.2334948778152466, -1.622504711151123], output: [-3.5246070064604282, -6.892655847594142, 14.091260128579158, 2.401029930407454, -2.680974992789798, 3.8916277984556755, 0.5252526044217629, 7.272547842564242, 4.888426456046666, 0.1230857616321388, 0.32227787389030693, 8.837821222991312, 6.797800467459476, -5.336196554450358, -0.3831869573992117, 3.2695703626864097, -5.256811993196607, -0.1872110292315483, -5.001142715544147, 2.250221073549945, 8.811058109636217, 5.232778335556318, -0.6512846517539874, -2.242062163272187, 9.451628634097027, 8.879343426811909, 2.0671711072014323, 9.227474643353165, -2.163253380148122, 6.008848575681659, 0.7074175020118927, 9.617801820579668, 2.4899496845901012, 2.3832117300480604, -2.562616988438002, -5.25214449133264, -0.2925401848541538, -7.40888946314626, 6.728447606715459, 8.585534132159177, -2.8502671899324317, 2.9276655593856034, 6.279407417654955, -0.03356822185923569, 0.5101090153146224, 9.679261772566992, -9.235685068269035, -3.9845762833139267, 3.263928959146142, -10.372698850929737, -7.636904664040565, -9.25411350837314, -5.141376625258181, 7.840129391031544, 1.7582723882580569, 2.5429684456901556, -1.7377452788371315, 0.002180726241851616, 2.8379890490594435, -2.0202939805697477, 0.396858681471695, 0.8400442760030362, 4.233191419326372, -5.119549879368318] },
+  fft_49_complex: { complex: true, input: [-0.653186559677124, -0.9652466773986816, 1.1103789806365967, -0.8953530192375183, 0.17809812724590302, 0.7561290264129639, 1.412551999092102, 0.4937201738357544, 0.6659147143363953, -0.35966482758522034, 0.7821815013885498, 0.9256682991981506, -2.061739206314087, 1.1828033924102783, 0.8627902269363403, -1.1108970642089844, -0.38785845041275024, 0.6139956712722778, -0.1652761548757553, 1.3882287740707397, 0.014210155233740807, -0.25180017948150635, 0.04756239056587219, -0.7563911080360413, 0.8808799386024475, -0.9352231621742249, 0.09307317435741425, 0.9392328858375549, 1.9853578805923462, 0.5460475087165833, 0.2075347602367401, 0.9603745937347412, 0.04923528432846069, -0.9488556385040283, 1.8051396608352661, -0.4981226325035095, -0.5307197570800781, -0.4026987552642822, -1.1836994886398315, 0.5920459032058716, 2.3241052627563477, 0.00905652716755867, -0.3693919777870178, -0.8863218426704407, -0.03561544418334961, 0.04909980669617653, 1.7207887172698975, -0.1781470775604248, -0.02191477082669735, 0.2800677418708801, -1.0829179286956787, -0.9707691669464111, -1.5767006874084473, 1.3887768983840942, -0.1409223973751068, -0.6077256202697754, 1.3477866649627686, -0.22426994144916534, -0.15349310636520386, 1.9329805374145508, -1.1728582382202148, -0.8913836479187012, -0.7144800424575806, 0.018777834251523018, -0.22412529587745667, 1.3015447854995728, 0.35400378704071045, 1.3718682527542114, -0.11531352996826172, 0.5478003025054932, -0.9265352487564087, -1.0495789051055908, 0.16852520406246185, -2.1708903312683105, -0.6477338075637817, 1.3192451000213623, -0.5638371706008911, -0.902698278427124, 0.1049879938364029, -1.401151418685913, -0.08417209982872009, 2.0441510677337646, -0.5061149597167969, -2.023317337036133, -1.237672209739685, -0.008067949675023556, -1.2302114963531494, -1.6217567920684814, -0.24184295535087585, 0.4429032802581787, -0.04756205901503563, 0.04733582213521004, 0.34705546498298645, 0.9135802388191223, -0.790744960308075, -0.045627497136592865, 0.08297869563102722, 0.028814325109124184], output: [-0.32149941846728325, -0.011710121296346188, 1.5719012837700608, -11.9435719170496, 7.889099849425332, 2.349164232580148, 3.101234046136667, -0.9897723162237673, 2.906207608358317, -7.272673248354674, -0.28567752458921314, -7.807930436881056, -9.98287287277238, 2.170225845960604, -3.9995455705964806, -6.139724045667118, -2.6586132853906514, -7.769710705243423, 0.9131703097247483, -9.940238191191922, -8.286631830021289, 7.3493173799497, 8.661878987183604, -3.553867174039695, 11.583726594048862, 6.9528434851612655, -9.85410942809862, -6.509237172308493, 3.264008317204529, -8.95247219585599, -1.0683784894257498, -8.209758113014594, -8.459077459978726, 3.5851739864239787, -3.130647641813152, 14.882850815864773, -12.112293562127494, -12.761696026041244, 8.867540664810484, 5.336684447842124, 6.405865160930168, 4.524565520649802, 3.1540883076724198, 0.8664358168359105, -14.034517102599953, -1.5599303443084476, -9.534876375145306, -4.0920648890208, -7.072712272425977, -4.325977005204747, -5.285935735332723, -6.9699101614527, -3.845638728726241, 0.6627204864850933, 11.368533948151075, 9.810447058144453, -2.880030910522577, 3.760503532656639, 0.5451509287272396, 1.6597167211016912, -8.58285942430189, -11.129246819405175, 2.030691093396224, 1.6669154167624718, 8.748833248959015, 6.192014902704366, -7.3302180587750225, -0.08243838526629593, 5.522485457006632, 10.260949490755591, 3.972158254442996, -14.10516382807534, 3.8678504886320235, -0.06906298330718297, 3.321736134975782, -4.198418580926676, -6.9491746204757945, 3.9162914328571237, 5.508243105428264, -2.4542837759115645, 0.26676806448008605, 2.9368877862382843, -16.784439087864854, -7.618115164366894, 3.8748624559849345, -1.3062132611368762, 0.5913456904524359, -2.1117316472857373, -0.6923707074676628, 1.8519748031549472, 4.596152734943953, -1.3913510533714808, 9.06246632833908, 5.377389540572535, -6.9033275845955755, 0.20446700081688807, -3.5466927958494017, 9.661642666154053] },
+  fft_64_complex: { complex: true, input: [-0.5566142201423645, -0.31118565797805786, -0.16021257638931274, 1.2795861959457397, 0.789443850517273, 1.4509670734405518, -2.3435871601104736, -0.908771276473999, -0.8936960697174072, -0.8750560879707336, -0.63246089220047, -0.6453881859779358, 1.4873902797698975, -2.564749240875244, -0.7355272769927979, 0.2437494695186615, 0.01852554641664028, 0.5734773278236389, 0.29105886816978455, 1.1027342081069946, 1.8088326454162598, -0.7475714683532715, -1.217250108718872, 2.0953369140625, -1.4750502109527588, -1.343973159790039, -0.5276563763618469, -1.297399640083313, 2.03525447845459, 1.159611701965332, 0.4579901695251465, -0.9503767490386963, 0.05310674011707306, 0.0014978965045884252, 0.18816789984703064, 0.1826970875263214, -0.8126240968704224, 0.18846499919891357, -1.0810470581054688, 0.39759552478790283, 0.6841163635253906, -0.6645811200141907, 1.4170793294906616, -0.13333140313625336, 0.4437287151813507, -2.8415510654449463, -0.07697270810604095, 0.41809648275375366, -1.0915343761444092, -2.776437282562256, -0.43745681643486023, 0.26580730080604553, 1.6740102767944336, 0.22755879163742065, -2.9241530895233154, 1.4011553525924683, 1.6825302839279175, 1.5555111169815063, -0.14711306989192963, 0.8428824543952942, -0.36785921454429626, 0.9028973579406738, 0.1373748779296875, 0.32123002409935, -0.22010713815689087, -0.599694013595581, -2.5014238357543945, -1.0242420434951782, -0.6038212180137634, -0.6158038377761841, 0.7263562083244324, 1.6927067041397095, 1.2062644958496094, 0.26875412464141846, -1.0378527641296387, -0.3995000422000885, -0.24585849046707153, -0.30057334899902344, -1.389653205871582, 0.22163736820220947, -0.07863101363182068, -0.22048908472061157, 0.9795127511024475, 0.6902763247489929, -0.20713239908218384, -1.416999340057373, -0.4947267770767212, -0.8151863813400269, -1.6725231409072876, -1.9904801845550537, 0.21500161290168762, 0.44239726662635803, -0.6723101735115051, 0.6706801056861877, -0.21685439348220825, 0.7185264825820923, -0.9003168344497681, -0.29727739095687866, -1.247776985168457, 1.110876441001892, -0.17718446254730225, -0.8177000284194946, -0.6590757966041565, -0.5754866600036621, 1.1977955102920532, 1.1977572441101074, -1.5937381982803345, -2.6281495094299316, -0.7925133109092712, 1.5867403745651245, -0.6221466660499573, 1.4161043167114258, -1.6597610712051392, 0.7671688199043274, 0.4350447356700897, 0.16583804786205292, -2.573836088180542, 0.8150725960731506, -0.6248568296432495, -1.627732753753662, 0.3362247049808502, 0.30031880736351013, 1.0735864639282227, 1.333678960800171, -1.4852718114852905, 0.8292962312698364, -0.2946455478668213, -0.4848930835723877], output: [-18.114436665549874, -1.03589254419785, -7.089877931654041, -8.136593953551714, -7.48613375743217, 3.4058307917826487, -0.11349749558287137, -7.230611270566168, -8.574244204990293, 5.7986400915728815, 5.473250096298479, 7.160363687658917, -5.13533852659519, 1.8255020562058237, 7.273548900259892, 24.690830952503084, 1.6868156269139245, 8.810813815606256, 6.6254795081932745, -4.567693073247041, -2.239049755279698, -17.516871542107324, 1.3976937653317711, -4.659023464944261, -3.498063522435509, -1.042294081352104, 10.872729327063226, -18.927297144289987, 4.267325206467068, -6.680543926630708, 6.4003803411255165, -14.418237604405501, -5.944847682490945, -10.613565055537038, -1.0402464930724309, 4.733410202647864, 10.284586844608768, 7.495884983823905, 10.978098280081051, -5.653369285851954, 17.957223243942725, 3.3651604181690082, -12.12841177766924, -2.173315060592438, -11.807254527678538, 15.743279066172082, -9.485910951122346, -5.369406024705013, -4.183827428022064, -1.3953354113002816, 5.236741461404014, -0.8655702406244625, -3.707989963955507, -7.550554154083424, -2.1556733662040837, 8.35703936882244, -1.893019665574852, 14.917386376122955, -0.5382039039277775, 2.069340044414755, 10.860091518498637, 6.278499449945484, 2.0427421023295516, 17.14233054094595, 11.975593766197562, -10.740802941727452, -12.646531404584593, 0.13425425752591735, -3.6872251499900193, -1.9392344875845375, -18.16736557788115, -20.48335434059285, 0.6875482818287537, 1.3708837169699595, 26.677807954665866, -1.8603500541565219, -6.790446289324352, -0.7636112342571075, 0.16202327721217014, 2.890628934614975, -13.405530896032472, -10.423450139391665, 3.2690754871155625, -0.13370157023242735, 17.421851422986148, -7.428197970888403, -2.696655432079798, -3.6753472703589476, -1.8393129488369242, -3.9804913300385483, -14.84575265792169, -2.1073012152618524, 1.9496145693049904, -7.632750924513978, -12.292581873003275, -3.378658179390201, -1.3949911389499903, 4.731505962205119, -4.453599350900657, 8.2927961867939, 2.890294756083995, 8.32112212455824, -7.313795240640941, 13.541967260990265, 0.3013408500166399, -22.02939466419966, -0.8879106014223499, -0.9280433153423213, -13.511110887018695, -0.9068611379695746, 13.278501353056239, 4.253543634046318, -6.101434519639574, -2.2367887703037312, -8.42198169225635, 15.996815858908311, 5.985629137333648, -11.238301003546834, 0.09940024194354224, 1.4371628357505024, 6.358283661511687, 5.197077900987281, 3.293723266284598, -9.325758113839981, 1.8329751785259507, 8.745205075152134, 0.42857376402367997, 12.425414796091186] },
+  fft_81_complex: { complex: true, input: [0.16182588040828705, -2.0999834537506104, -0.5849781632423401, 0.24368105828762054, -0.6967284083366394, -0.034912507981061935, -1.1470516920089722, 0.7932613492012024, 0.7196304798126221, 1.1110419034957886, -0.939569354057312, 2.4974799156188965, -1.2485228776931763, -2.838197946548462, 0.9772579669952393, -1.0605124235153198, -0.9749694466590881, 1.0589070320129395, -0.9194549322128296, 0.9585606455802917, 0.15526491403579712, 0.8247618079185486, -1.3407961130142212, -2.523029088973999, 1.0570300817489624, 1.5672498941421509, -0.07786913961172104, -0.20268507301807404, 1.7038300037384033, 2.0308806896209717, -0.27392077445983887, -0.06376122683286667, -0.310247540473938, 0.5324293375015259, -1.7548904418945312, 0.9650161266326904, 0.41914716362953186, -0.35616472363471985, 1.2825931310653687, -2.2441608905792236, -0.5296666622161865, 0.5721330642700195, 1.0294588804244995, -0.04206320270895958, 0.15277068316936493, 0.8333802819252014, -0.858043372631073, -0.12947990000247955, 0.46606531739234924, 0.21820232272148132, 0.2710532248020172, 0.36254584789276123, -0.7573392987251282, -0.340090274810791, 1.7085819244384766, -1.3537571430206299, 0.25935328006744385, -1.2952094078063965, 0.2727315127849579, 0.7723783254623413, 0.03050963208079338, -0.06484062969684601, 0.6543181538581848, -1.2400768995285034, 0.7176415920257568, 1.6827946901321411, -0.272500604391098, 1.1401575803756714, 1.164631724357605, -1.5258969068527222, -0.1886187493801117, 0.8159262537956238, 0.6548362374305725, -1.195924997329712, 0.7484856247901917, -1.5505919456481934, 1.5596144199371338, -0.6034004092216492, -1.260884404182434, -0.6083353757858276, -0.6304160356521606, -1.0715056657791138, 0.6321176886558533, -0.11946370452642441, 0.2912154495716095, 0.014026062563061714, -0.831264078617096, -0.9964612126350403, -0.20909088850021362, -0.11727125197649002, -0.705121636390686, -0.5439452528953552, -1.5571593046188354, -1.6995549201965332, 1.7400833368301392, -0.0008625089540146291, -1.17470383644104, -0.997884213924408, -0.3725011944770813, 0.35961270332336426, -0.3637978732585907, -0.7026296854019165, 0.4852464497089386, -0.5324466824531555, -1.6679120063781738, -0.7600529193878174, -0.40924522280693054, -1.3232053518295288, 0.48746198415756226, -0.5340525507926941, -0.06851266324520111, -2.164428949356079, -0.23488061130046844, 0.4067259430885315, 0.1828349083662033, -1.640533208847046, 1.0253461599349976, -0.3487933874130249, -0.39523687958717346, 1.1609245538711548, -1.0767440795898438, 0.29041048884391785, -0.4530940651893616, -0.1469300389289856, -0.5046754479408264, -1.5347036123275757, -0.49093714356422424, 0.2856205403804779, -0.3833635449409485, 0.01342631783336401, -1.825499176979065, -2.1771950721740723, -1.1022430658340454, 1.0045795440673828, 0.35692599415779114, 0.8746539354324341, -0.22437222301959991, 1.1157619953155518, -1.7885184288024902, -0.09074169397354126, 0.9822217226028442, 1.47532320022583, -1.4025393724441528, 0.5067855715751648, 0.8213406801223755, -0.7191897630691528, 0.3869772255420685, -0.3575884699821472, -1.186098337173462, -0.10889001935720444, -1.2052828073501587, -0.4578658938407898, -0.15333758294582367, 1.691064715385437, 1.8314754962921143, -0.3512211740016937, 0.8154265880584717, -0.3477337062358856, -1.0724186897277832, -0.0016170362941920757, 0.8384286761283875, 1.601130723953247], output: [-8.58128398284316, -11.439007951354142, 0.210079195962251, 5.283435256837555, -3.729018054516872, 4.172233149410547, 6.0094247849082345, -4.867406596990031, 1.6896215297204356, 8.254567899165185, -1.5832996916764728, 6.874626142064364, 7.791926933815942, -0.1113988953646401, 6.157976194485636, 7.908498593612423, 2.1554616784825296, 5.087697970296618, -10.929793301868514, -20.248492473955267, -1.7673074913954707, 4.926997195410063, 3.7962336785471167, 9.785487339090924, 7.5509937732589805, -4.618565996292649, 1.2172725036089613, -10.455295568854787, -10.613583259389454, -12.094708219848364, -11.920648753075001, 2.7422204833456965, -14.39499933098191, -7.986984514501212, 1.4636316169286738, 3.6526472221302644, 1.7780382602070137, 0.929515572223278, -4.1263772762395785, 17.702314875380782, 3.8927335234933165, 3.3808527564005315, 20.656948077140108, -10.291866793201438, 10.429803208602253, 3.2896163180066105, 2.885996928025134, 6.240479112191121, 5.126031982460415, -15.569134216773076, -14.789597569471173, 4.411143413971, 11.01877897308899, -18.4554008501283, -13.452431942413536, -12.210080319899106, 2.7697215257400316, -10.572091571648135, -4.523764337963882, 3.9530076132464806, -1.2538076112045768, 5.196509906553922, -4.582902295841774, -2.4578802622089864, -16.85598763977452, 13.658512440269673, -3.7344804477869467, 25.005764333536174, -5.796885916344451, -3.741579209020099, 14.995322293493794, -9.021964181821255, 12.535576498170204, -3.2115394425750283, -5.316067284014389, -15.73079791413737, 5.410951763343561, -2.179490941864971, -17.59181683345424, 0.35349287922924855, -4.114985065194593, -3.6153272922914104, 5.362879738072423, -6.115564376188364, 10.568119159000045, -15.318073540754956, 4.64815841280388, -21.68501783286247, -13.224626602809812, -2.219868377234576, 4.00167448459214, 1.4272276456380588, -11.341596811743006, 4.873948561987781, 4.2861526759974895, -5.96023765608868, 3.9760305188050347, 6.566800933990912, -5.002198883369546, -2.0309709147031416, -1.2884460262093702, -12.32833011496317, 0.05382037687629326, -10.78439574018218, 1.534354885003597, -13.264704159013437, -12.280184562954279, 11.35458553480188, 9.869000609110085, -0.9969714411418238, -5.650404262545905, -11.399256472642067, -6.3488730387883825, -17.807560792326566, 17.456387259491258, -8.437210476076581, 3.5889363471995455, 1.8943033185815557, 4.734889441188521, -5.865838952657386, -1.197257968989023, -16.13996530086823, -6.139028694267207, 3.1292836318406754, 12.18828544229761, 4.610631776769463, 18.678240243897175, 13.33540206528819, 15.33449728617182, 3.2382519818121347, 7.4739674455797, 6.838902240570333, 2.105371692765212, -1.5378787198749482, -2.6243434923047406, 0.11652216064202503, 2.5272654877458214, -4.532248506023914, -1.954636472567441, -3.4146656232065977, -9.008911319126828, -11.353937437276306, -7.625555655430608, -5.250709931084791, 5.340100553320184, -17.615583941013096, -3.279927088522264, -4.17250143308863, 4.4785210214064195, 6.400344186694214, -3.7005380565285066, -1.122575913062859, -0.5038180980239875, 2.5292421600004156, -1.6061528617728151, -14.630926203339426, 4.481494466029414, -5.292535421445056, 6.4147407824254055, -8.858967182263992, -13.101978958787178, 27.791783247323977] },
+  fft_125_complex: {
+    complex: true,
+    input: [0.8124353885650635, -1.2395265102386475, 0.16039574146270752, 1.9819504022598267, -0.3600437343120575, 0.5889578461647034, -0.24872885644435883, 0.7082515954971313, -0.5773665904998779, 0.13543199002742767, 0.8298076391220093, -0.4342207610607147, -0.4896481931209564, 0.8350552916526794, -0.7197877764701843, -1.5380432605743408, -0.06107468158006668, 2.201507568359375, -0.3374556005001068, -1.2920140027999878, -1.5635737180709839, 1.6503757238388062, -0.6105449795722961, 0.4370865225791931, 0.5172725319862366, -0.6922412514686584, -1.2310928106307983, -1.8327370882034302, 0.6385520100593567, -0.11756168305873871, 0.23863999545574188, 0.018904687836766243, 1.5264911651611328, -0.10709311813116074, 0.5826241374015808, -1.765297770500183, -0.6799334287643433, 0.571349561214447, -0.5346289873123169, -0.8332467079162598, -0.20369751751422882, 0.30775347352027893, -0.7439308762550354, -0.5451762676239014, 0.4147465229034424, 0.5432214736938477, 0.0078077311627566814, 0.9431089758872986, -1.0435984134674072, 0.23313158750534058, -1.209730863571167, 0.45955437421798706, -0.3265855610370636, 0.6053746342658997, -0.8725820183753967, -0.5241636633872986, 1.7610948085784912, 0.6633955836296082, -0.9714349508285522, -0.5136868357658386, 1.5349853038787842, 0.3695436716079712, 0.2814086675643921, 2.5949618816375732, 1.2379722595214844, -1.8505828380584717, 0.47188371419906616, 1.3736778497695923, 1.1676279306411743, -0.2972381114959717, -1.2804001569747925, -1.7225773334503174, -1.577651023864746, 0.3269241452217102, 0.4372701346874237, -0.9890488386154175, -1.1033475399017334, 1.9549521207809448, 0.6475730538368225, 0.5292824506759644, 0.4473706781864166, 0.6432071328163147, -0.1467706263065338, 0.20976980030536652, 0.7794570922851562, -0.31381580233573914, -0.6803262233734131, 0.011037338525056839, 0.9622517824172974, -0.8368542790412903, -0.8587225079536438, 0.633898138999939, 1.2174996137619019, 2.1725356578826904, 0.2897004187107086, -1.309606671333313, -2.598196506500244, 0.557666540145874, -1.5418102741241455, -0.8026767373085022, 0.08196965605020523, -0.002491528633981943, 1.587324857711792, 0.720113217830658, -0.7349403500556946, 1.192823052406311, 0.729076087474823, 0.2271231710910797, 0.9269818663597107, -0.006230967119336128, 0.7080751061439514, -2.3676698207855225, -1.2577210664749146, -0.24024532735347748, 1.1770211458206177, -0.5720393061637878, -1.920380711555481, 0.8346250057220459, 0.7047927975654602, 0.19386297464370728, -1.093695044517517, 0.6820136904716492, -0.2829663157463074, -0.11388272047042847, 0.6887729167938232, 0.09922466427087784, 1.250266194343567, 0.6323102712631226, 0.1848202496767044, -1.8433687686920166, 0.4263668954372406, -0.24713163077831268, 0.01820266619324684, 0.8616644144058228, -1.5792089700698853, 0.6367298364639282, -0.3927980363368988, 0.6539236307144165, 0.8779292106628418, 0.5544044971466064, -0.4019509553909302, -0.3177534341812134, 0.4690700173377991, -0.7643328905105591, -1.5016077756881714, 0.5900797843933105, 0.36968597769737244, 0.7913125157356262, -0.9097312688827515, 0.7577337622642517, 0.7302269339561462, 0.7785899639129639, -0.7808380722999573, -1.0440396070480347, 0.5710436701774597, -0.6633660197257996, -0.6982168555259705, 1.5012997388839722, 1.8683104515075684, -0.009162770584225655, 0.053628917783498764, 0.7560212016105652, 2.734218120574951, 0.8975538015365601, 0.9965337514877319, -0.9736179709434509, -1.3732835054397583, -0.22769349813461304, 0.5279285311698914, 0.9973939657211304, -1.5946539640426636, -0.3334585130214691, 0.5114824771881104, -0.8088312745094299, -0.009309413842856884, -0.3517300486564636, -1.9116069078445435, 2.348860740661621, 0.5795866250991821, 0.6854371428489685, -1.3237437009811401, 0.30641838908195496, -0.9147019386291504, -0.4391704797744751, -0.5839483141899109, 2.629678726196289, -0.1468760073184967, 0.2384973019361496, 0.9125059247016907, -0.9323630332946777, -0.12135637551546097, -0.1401946097612381, 0.6303017139434814, -0.8720086812973022, -0.4348902702331543, 0.707770824432373, 1.0431065559387207, -0.3136989176273346, -0.34615302085876465, -0.09326159954071045, 1.0725315809249878, 0.6460273265838623, 0.9376847743988037, -0.45695820450782776, -0.6338280439376831, 1.7222473621368408, -1.7862892150878906, 0.24867220222949982, -0.8356691002845764, -0.5947818756103516, 0.46319833397865295, -0.5724456310272217, -1.3741754293441772, -1.2928011417388916, -0.15125389397144318, 0.35185670852661133, 0.45909401774406433, -0.276844322681427, -0.7534619569778442, 0.17448769509792328, -0.8041709065437317, 0.7715813517570496, -0.17985635995864868, -0.3155222535133362, -0.27937594056129456, -0.6012001037597656, 1.3496509790420532, -0.07789217680692673, 0.911784827709198, 0.6547526121139526, 0.6284425854682922, -1.5268802642822266, -0.11813484132289886, -0.8698882460594177, 0.5227888822555542, -0.2165810912847519, -0.807140588760376, 1.1484020948410034, -1.4427751302719116, 0.6043649911880493, -0.525101363658905, -0.7420395612716675, -0.4712083339691162, -0.04465737193822861, -0.4328814148902893, -1.3837007284164429, -0.06250803917646408, 0.5973103046417236, 0.2701307237148285, -1.2017232179641724],
+    output: [-8.60566947190091, 9.047967813443393, -6.678979661059562, -8.360831755799953, 3.115231761090947, -6.697091460792556, 0.7568358822534593, 11.066622749967639, 2.8466255459388927, 6.27918164849176, 1.982200342331545, 0.3737138114008831, 8.93599812413914, 12.19410650368572, -4.5824951902858615, -4.643371981421932, 11.975504695471635, -19.79773110340414, 16.894522457707588, -11.40437941841762, -0.5369639596199498, -9.687637013191164, -2.1092885577719587, 10.925736950748426, 2.630639119388632, -2.8059934209514976, 8.536957582736084, -6.326392685852441, 19.05482126619987, 4.74321319044126, 1.7193491466752984, 17.828632923932844, 11.48731740639608, 11.824203873765583, 17.4496745991033, 14.886923392164112, -8.857144569139852, -18.45702795068501, 1.7494909136262233, -0.10810961271060027, 11.536135407131509, -12.549170934604959, -5.461911779043582, 9.93390865307337, 7.577784660577098, 11.044552867393765, -0.38872990536666596, -4.554231235610941, -9.19372645559055, 3.9372069026290077, -0.01611898707200954, -14.170353635176678, 2.977419276887061, 1.171732761405826, 9.77702001582745, 4.731009139477455, -6.457985308398847, -15.847828201326442, 9.95926759256128, -11.402544444287324, 17.96375259440102, -6.2530379131604725, 4.938790863887686, -10.135905976382034, 5.383747427255329, 6.230288801920859, 6.384329417961547, 1.3219731997515085, 3.1738925777367895, 11.969669020511425, -7.174837958584151, -14.303685689056584, 0.7454990885394415, -0.8185666838197116, 8.849555077234482, 3.9055874990272663, -2.018206936296142, -2.3395830964594415, -9.60564366177047, -5.769798601114195, 0.7520113635166212, 0.5065019274195421, -10.25514045587141, -9.637019763988052, 18.803482381840155, -2.3670620465003394, -11.199527109220814, 0.007833023989774368, 8.625619052479255, 9.951138032831594, 7.758961547592464, -1.8211300264093309, 6.718724482310631, -3.675823001520721, -6.918876012140167, -11.471658204308536, 2.467033682197071, -8.508193328034618, 14.225276109269815, 16.144776448129633, 15.927933230731913, 10.235538687304809, 5.767881945940595, -28.86811292326169, 9.669928964930417, -4.668293602235824, 14.769201279257288, 11.067653051718727, 12.761530418035363, -11.044953949483144, 4.242003110570776, -23.262991395970026, -10.145352776759976, 9.212885213669484, 6.329756726823897, -2.401697066728733, -8.54352813832675, 0.701551842851496, -15.258656469035635, -19.14394691246519, -7.522186448172975, -1.997676262113934, -11.408489749207227, 24.631586391822943, -5.098355854851496, 1.0410089604391717, 22.688569178648756, 19.097185694651152, 0.16409737757445608, -14.474036852523309, 4.7859526448704734, 19.558395170182035, 10.633635125709178, -3.0615977245039865, -6.883177607599656, -4.696650706016856, 4.286737340380929, 1.1391516077082633, -17.043528019280284, -7.2919743123329965, 18.42867500955987, 9.458517392981532, -7.23747329102956, 3.0801112402296056, -19.46748520769718, -19.760353803723497, -4.916492200532298, -11.492889148389452, 18.97908975026986, -9.61307230309599, -31.333799318924978, -6.6051904344747445, -2.610154763269807, 17.822400589161646, 5.128110541635698, 7.169102569854218, -20.704759452278893, -13.58731153366598, 9.44388682655616, -1.6262238850380326, 2.4785447765146986, 15.61067463907144, 8.206847406028755, -3.2188956590845867, 10.069966894154257, -17.811422869295264, -7.333188700567103, 7.277796245862989, -5.975213239029163, 0.0776494376891641, 4.448473434391025, -2.74774293836415, -19.167307678242555, -22.10574472079241, -0.9176460422085646, -2.698508980697669, -2.9126695148592425, -11.855087227776476, -10.63642091855646, -4.994794817859426, 8.34646291775615, 6.768483375508413, -10.605502170349608, 14.164919594071957, -12.394208891449225, 12.057338941547131, 22.90400935226583, -15.664600594740676, 1.30010074910831, -0.2199980758912563, -0.13084869289575352, 1.5514316991800943, -6.207758174655294, -21.281702562218502, 0.6785363179803934, 1.3313626670817715, -4.335780044121057, 0.5496189332641022, -12.958391630849423, -8.448073173532364, 9.262064248245114, -6.620464792278902, 6.229199284276308, 14.566232762792579, 7.969523876692342, -6.776657815077612, 12.290319767193232, 9.559089975820987, -10.33865192053783, -22.41131767867546, 2.9821554944671256, 4.62861355727904, -1.556181975825081, 15.142425835968055, -7.526775680393509, -16.871371735315304, -16.91792120984387, 10.94566759815768, 7.919149545235784, -2.721091872750706, -11.568727749623791, -6.404971618403414, 0.9048111698914159, -12.163107403321717, -23.30918866848898, 0.29599861864002963, 21.938310258693168, -2.3132157853956175, 0.7524657263569949, -5.613641198847075, -8.631493933375305, 17.933382465737296, 15.905249964584621, -0.12417131736795461, 16.840693139623014, 10.447828447732046, -15.117925469220577, -1.7000257658739502, -14.07936041053577, -12.445181510422046, 0.09178864954511656, 8.976039835817634, -3.0952547078859665, 4.42715006938438, 2.016084671443174, 1.299572020270631, -5.764752453011912, -5.767130951426552, -4.024939854949247, -6.303606982466311],
+  },
+  fft_128_complex: {
+    complex: true,
+    input: [-0.46925589442253113, -0.17965197563171387, 1.0996962785720825, -0.38428056240081787, 0.48438164591789246, 0.4687068462371826, 0.39249294996261597, -0.3080357015132904, 0.5177378058433533, 0.6460461020469666, 2.131727933883667, -1.9881218671798706, 1.7797539234161377, -1.7341786623001099, 1.8023210763931274, 0.04056849330663681, 0.5402734875679016, -1.2557553052902222, 1.6492481231689453, 1.1959751844406128, -1.1998282670974731, -0.28071489930152893, -1.1383824348449707, -1.105458378791809, -1.3544220924377441, -0.9994213581085205, 1.0511277914047241, 1.3731906414031982, -0.366388738155365, 0.5248828530311584, -0.06661345064640045, 1.1866965293884277, -1.7126017808914185, -0.8613355755805969, -0.15631477534770966, -0.25460687279701233, -1.6851294040679932, -0.479960560798645, -1.6602188348770142, -1.8969695568084717, -1.3355588912963867, 0.2036823183298111, -0.21647731959819794, 0.027381736785173416, 1.3588712215423584, -1.916928768157959, -0.774971067905426, -0.9908143877983093, -0.5971747040748596, 0.1783454567193985, -1.5134319067001343, 0.49953287839889526, 0.6159718632698059, -0.36356979608535767, -1.0240488052368164, -0.04502468928694725, 0.4399787485599518, 1.6701726913452148, -0.29381847381591797, -0.10132483392953873, 1.382400393486023, 1.0435426235198975, 0.9063398838043213, -0.8696965575218201, 1.7439777851104736, -1.1632086038589478, -0.8023254871368408, 0.18316711485385895, 0.7868092656135559, 0.6457328796386719, 0.000458094640634954, 0.2006639540195465, 0.44469723105430603, 1.5764163732528687, 1.37369704246521, 1.4348359107971191, -0.3319808542728424, 0.4867582619190216, 1.6903849840164185, 0.09980353713035583, -0.8114499449729919, 0.21222348511219025, 1.7070233821868896, -0.06860260665416718, 0.3595496714115143, -0.22636087238788605, -0.11660676449537277, 0.35164034366607666, 0.6463969349861145, 0.7742586731910706, 2.551539897918701, -0.36875852942466736, -0.907656192779541, -1.5238984823226929, -0.17778240144252777, -1.5085883140563965, -2.1957619190216064, 0.2777234613895416, -1.9981634616851807, -0.36680006980895996, 0.43345877528190613, 0.12283071875572205, -0.7519865036010742, -0.8946448564529419, -1.0104613304138184, -0.574897289276123, -0.7528412342071533, -0.05524379014968872, -0.2588641345500946, 0.13064302504062653, -1.2371978759765625, 0.4406910538673401, -0.06988917291164398, 2.41745924949646, 0.2212488353252411, -0.9714004993438721, 0.41281431913375854, -0.5191168785095215, 0.5448367595672607, -0.5188544392585754, -0.35594210028648376, 1.31781005859375, 0.19899560511112213, 0.10419808328151703, -0.7013852000236511, 1.2463740110397339, -0.6775157451629639, -0.07904287427663803, -0.46743783354759216, 0.6186703443527222, 1.0175814628601074, 1.2864513397216797, -0.9423191547393799, -0.815204381942749, -1.4644925594329834, 0.4917009174823761, 1.0119210481643677, -1.106779932975769, -0.537906289100647, 0.09276387840509415, 2.3790464401245117, -0.18112841248512268, -0.7766230702400208, -0.4697842597961426, -0.24867388606071472, 0.350057452917099, -1.7352688312530518, 0.3294185996055603, -0.049025554209947586, 0.19478608667850494, 0.3822934031486511, 0.8966708183288574, 0.7362163662910461, 1.9381089210510254, 0.7763320207595825, -0.9195051193237305, -0.15279459953308105, 0.7260957956314087, 1.248564600944519, 0.8662853837013245, 0.22399753332138062, -0.5145770311355591, 0.6150395274162292, 0.17572005093097687, 0.1295638382434845, -0.823893129825592, 0.09608310461044312, 0.1056567132472992, -0.1421104371547699, -0.13746590912342072, 0.6133358478546143, -1.2861822843551636, 0.8356537818908691, 0.5849096775054932, 0.7742197513580322, 1.864620327949524, 0.1762249767780304, -1.6820768117904663, 0.170370414853096, -0.9834036827087402, -0.3257652521133423, 1.3807744979858398, 0.5471190810203552, -1.2435959577560425, -1.5459822416305542, -1.613089919090271, -1.5104633569717407, -0.7388973832130432, -0.3404698073863983, 2.2242629528045654, 1.101983904838562, -0.41542941331863403, 0.825286328792572, -1.7480731010437012, -0.227715402841568, -0.2249363213777542, -0.43012070655822754, -0.7993203401565552, -0.06888776272535324, 0.6460397839546204, 0.8002319931983948, 0.17660461366176605, 1.4292235374450684, -1.6437238454818726, 0.43454229831695557, -0.011055096983909607, -1.4420714378356934, -1.447640299797058, 0.16068753600120544, -0.2804417014122009, 0.3235762119293213, 1.2617326974868774, 0.1411382406949997, -0.23557768762111664, 0.13576534390449524, -1.6873000860214233, -0.42229339480400085, 0.8121863007545471, -1.0847389698028564, -1.6272295713424683, 1.8023097515106201, 0.7626906633377075, -1.3667072057724, 0.14141327142715454, 1.5778913497924805, -0.9910776615142822, -0.013187640346586704, 0.8963002562522888, 0.3190685510635376, 1.4895867109298706, 0.8450273871421814, -0.7519250512123108, 1.1641961336135864, -0.1140206977725029, -0.7162372469902039, -0.2397381216287613, 0.3938809037208557, 1.8317588567733765, -0.32195407152175903, 0.9710619449615479, -1.7239831686019897, -2.138955593109131, -1.1850391626358032, -1.1381371021270752, -0.2674075663089752, -0.7534425258636475, 1.180410385131836, -1.2349971532821655, -0.9732410311698914, -0.9838739633560181, 0.1530347764492035, 0.7869290113449097, -0.5131613612174988, 0.7393561601638794, 0.5818021893501282, -0.9010815024375916],
+    output: [4.6533055662875995, -11.01426088809967, 8.89589916229627, -8.67151790820412, -5.338702909401541, 1.2596635311977273, 3.5364356687085934, 7.495255571019467, 14.607353334543243, -18.087691812322902, 7.564636217194101, -31.42023502604239, -6.214070442777103, -10.959840511165702, -0.23122589156934037, -11.620785761919407, -8.17956409451724, -2.5877133314432683, 11.015937578315528, 5.780471206474331, -6.0841540885916, 20.060728028967564, 3.2708359605096224, 11.782757950154132, 0.09008114957166846, 3.524696982412627, -10.677391613915269, 0.31298678204602526, -21.35192563693417, 12.385290905482485, 2.161430892216815, 1.8976227049433234, -8.989539272795344, 13.126889442846508, 2.8499370378020865, 1.7474738916889598, 5.022029084645628, -3.2038788652839223, 5.800603925256104, -11.310381554572714, 8.70516962035894, -19.131074662654022, 1.7802965964722235, -14.806060115152377, 14.302944499531886, -15.445567609224282, -3.8462368249304397, -9.516418012771346, 13.019042335168749, -10.443622659963964, -10.883324294802648, 3.957267045122531, -0.667106901008031, -4.392642175023952, -5.891115650099352, 16.457002297798386, 8.618862097531547, 8.454290723978408, -1.5705784838263321, -2.0950451541937802, -0.8023593073701956, -3.1480197619642944, -5.620797591431048, -4.595460315016652, -5.651367928832769, -12.289105277159251, 12.268533105640746, -14.70471337060609, -2.55864085006375, 4.848469317941595, -15.477103640433441, -0.8200806702142849, 1.477227192472725, -8.856561753264552, -1.7000675285625562, 17.974609221649885, -1.8152649987658895, 2.226238832898174, 5.942983183373236, 5.062290291727308, 7.091530929787825, -2.0854528335431413, -2.2607116079192284, -0.9522421064376894, 1.9922801075296586, -4.848590729633989, 22.20475854724638, 14.139180422304625, 7.742631959368987, 8.736735176873482, -14.221452296099534, 10.498675928711396, -0.16082936982259532, -3.8946140234989644, -20.400591916908017, -26.146811938453247, 6.587325710150547, -20.584777745167045, 14.638519536239858, -1.6926103003462887, -10.208680237895118, 6.147709389485655, -1.986836416339922, 15.002313879999715, 6.5052637626549386, 2.3893876610117486, -1.4903416359002417, 2.5125845767117747, -11.629045740948362, -1.0314802432568921, -7.7204438190047515, -2.026871521184386, 5.038120875238651, 12.509857810233317, -2.8111210090417167, 6.6237223115217265, -11.560596890267984, 21.096203768560102, 2.492616415367813, 15.694305674116976, 6.98125969245643, -2.51588120579323, -23.614269801976544, -19.721211546858797, 5.821751650555415, -7.957710273512344, -2.3083909989954927, -9.024817057699233, -2.3525185122853145, 12.543071284890175, -6.633775272995389, 1.9856801362534808, -18.01947792290391, 1.6526855337166202, -25.06385984262638, 15.109040183134795, -3.952374815603415, 8.226644901530008, 18.761705127047698, 3.93250694875225, -7.442640246025839, -19.86894607874479, 12.889936646377077, 17.058248465954463, -10.38590977421429, -8.762421865432158, 21.57778103004022, 3.943591516088906, -5.804714467684743, 4.255345272797616, 6.842976031631512, 1.5538004698130026, -4.425512141506818, -4.377682130235696, -0.5744161527454539, -6.466874369118861, 2.117583592008284, 9.441789454733126, 8.94127387455451, -0.04257498982078545, -6.206626408089971, -17.798516107544632, 3.1902492140570136, -8.701598789976085, -3.6350162587702526, 11.47775543413513, 5.763743605757109, -15.507961256533623, -12.20946994071868, 15.597966311245933, -7.577596335933641, -5.783678667731976, 0.5854316506349582, 1.0524576740591431, 9.32444015560922, 14.86759124801031, -2.8564425698215246, -4.050241339325346, -14.743103022730313, 0.6982253561530176, 1.4485867743987042, 15.863950914048452, -10.345289369065991, 18.289197082863648, -15.758886684054374, -5.422633603143401, -0.983270019651755, -12.824811394399234, -2.2739348529046874, 10.113426086776306, -29.057768796230015, -23.164653851413163, -14.355517383664846, -1.9394528310513124, 13.636590068666269, 3.459140997767836, -1.3650156273028862, 4.305744984992871, 17.561184167420265, -10.355528699590693, 21.10112660094154, -8.222224503241419, 2.535762677259953, 7.713352852965743, -14.28383557691274, 25.855974989407443, 4.209799001514582, -21.52088963141282, 10.888001415630182, -13.13372692624647, 11.475712218807361, 1.8430819379630443, -4.696550665547157, -6.590021563791754, -25.66889721277428, 3.6636259509044233, 16.177010478373504, -18.738751282648302, -8.929151482851156, 1.9637629189516739, 9.362438823950344, 18.876072069444945, 15.70735011826503, -7.857995088347812, 1.9318237720996665, -16.12923916175005, -7.934748190521045, 0.06924911452461413, -14.499587783289256, -0.4727635184847867, 10.482895882033887, -9.583766230162844, -4.311441092707766, 10.174307532202345, -12.09622773401161, -11.448182124496999, -8.63443998241864, 19.56612298031517, 12.124200790573179, 13.998676498504775, 2.185864445998216, 9.654230140395715, -13.738018677296829, 23.468720582013574, 9.279088660718887, -3.9238192921918174, -13.262040984478173, 13.199249429343212, 8.476422030030909, 22.120527194597067, 30.905491552855995, -14.140763293470911, -3.076195026308723, 2.461867832774407, -1.150707769438454, -18.323349178808648],
+  },
+};
+
+describe("Mathematical operations", () => {
+  describe("bankers rounding", () => {
+    it("should round up to nearest even", () => {
+      expect(bankers_round(-0.5)).toBeCloseTo(0);
+      expect(bankers_round(1.5)).toBeCloseTo(2);
+      expect(bankers_round(19.5)).toBeCloseTo(20);
+    });
+    it("should round down to nearest even", () => {
+      expect(bankers_round(-1.5)).toBeCloseTo(-2);
+      expect(bankers_round(2.5)).toBeCloseTo(2);
+      expect(bankers_round(18.5)).toBeCloseTo(18);
+    });
+  });
+
+  describe("median filtering", () => {
+    it("should compute median filter", async () => {
+      const t1 = new Float32Array([5, 12, 2, 6, 3, 10, 9, 1, 4, 8, 11, 7]);
+      const window = 3;
+
+      const target = new Float32Array([12, 5, 6, 3, 6, 9, 9, 4, 4, 8, 8, 11]);
+
+      const output = medianFilter(t1, window);
+      compare(output, target, 1e-3);
+    });
+
+    // TODO add tests for errors
+  });
+
+  describe("log softmax", () => {
+    // Should match output of scipy log_softmax
+    it("should compute log softmax correctly for usual values", () => {
+      const input = [0, 1, 2, 3];
+      const expected = [-3.4401896985611953, -2.4401896985611953, -1.4401896985611953, -0.44018969856119533];
+      const output = log_softmax(input);
+      compare(output, expected, 1e-13);
+    });
+
+    it("should compute log softmax correctly for values with large differences", () => {
+      const input = [1000, 1];
+      const expected = [0, -999];
+      const output = log_softmax(input);
+      compare(output, expected, 1e-13);
+    });
+  });
+
+  describe("FFT", () => {
+    // Should match output of numpy fft
+    it("should compute real FFT for power of two", () => {
+      {
+        // size = 4
+        // np.fft.fft([1,2,3,4]) == array([10.+0.j, -2.+2.j, -2.+0.j, -2.-2.j])
+        const input = new Float32Array([1, 2, 3, 4]);
+        const target = new Float32Array([10, 0, -2, 2, -2, 0, -2, -2]);
+
+        const output = fft(input);
+        compare(output, target, 1e-3);
+      }
+
+      {
+        // size = 16
+        // np.fft.fft([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
+        // == array([136. +0.j        ,  -8.+40.21871594j,  -8.+19.3137085j ,
+        //            -8.+11.9728461j ,  -8. +8.j        ,  -8. +5.3454291j ,
+        //            -8. +3.3137085j ,  -8. +1.59129894j,  -8. +0.j        ,
+        //            -8. -1.59129894j,  -8. -3.3137085j ,  -8. -5.3454291j ,
+        //            -8. -8.j        ,  -8.-11.9728461j ,  -8.-19.3137085j ,
+        //            -8.-40.21871594j])
+        const input = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
+        const target = new Float32Array([136.0, 0.0, -8.0, 40.218715937006785, -8.0, 19.31370849898476, -8.0, 11.972846101323912, -8.0, 8.0, -8.0, 5.345429103354389, -8.0, 3.313708498984761, -8.0, 1.5912989390372658, -8.0, 0.0, -8.0, -1.5912989390372658, -8.0, -3.313708498984761, -8.0, -5.345429103354389, -8.0, -8.0, -8.0, -11.972846101323912, -8.0, -19.31370849898476, -8.0, -40.218715937006785]);
+
+        const output = fft(input);
+        compare(output, target, 1e-3);
+      }
+    });
+
+    it("should compute real FFT for non-power of two", () => {
+      {
+        // size = 3
+        // np.fft.fft([1,2,3]) == array([ 6. +0.j, -1.5+0.8660254j, -1.5-0.8660254j])
+        const input = new Float32Array([1, 2, 3]);
+        const target = new Float32Array([6, 0, -1.5, 0.8660254, -1.5, -0.8660254]);
+
+        const output = fft(input);
+        compare(output, target, 1e-3);
+      }
+    });
+
+    it("should compute complex FFT for non-power of two", () => {
+      {
+        // size = 3
+        // np.fft.fft([1+3j,2-2j,3+1j]) == array([ 6. +2.j, -4.09807621+4.3660254j, 1.09807621+2.6339746j])
+        const input = new Float32Array([1, 3, 2, -2, 3, 1]);
+        const target = new Float32Array([6, 2, -4.09807621, 4.3660254, 1.09807621, 2.6339746]);
+
+        const output = fft(input, true);
+        compare(output, target, 1e-3);
+      }
+    });
+
+    it("should compute complex FFT for power of two", () => {
+      {
+        // size = 4
+        // np.fft.fft([1+4j, 2-3j,3+2j, 4-1j]) == array([10. +2.j, -4. +4.j, -2.+10.j,  0. +0.j])
+        const input = new Float32Array([1, 4, 2, -3, 3, 2, 4, -1]);
+        const target = new Float32Array([10, 2, -4, 4, -2, 10, 0, 0]);
+
+        const output = fft(input, true);
+        compare(output, target, 1e-3);
+      }
+    });
+  });
+
+  describe("FFT (random & dynamic)", () => {
+    // Should match output of numpy fft
+    for (const [name, test] of Object.entries(FFT_TEST_DATA)) {
+      it(name, () => {
+        const output = fft(test.input, test.complex);
+
+        if (output.map((v, i) => Math.abs(v - test.output[i])).some((v) => v > 1e-4)) {
+          console.log("input", test.input);
+          console.log("output", output);
+          console.log("target", test.output);
+        }
+        compare(output, test.output, 1e-4);
+      });
+    }
+  });
+});
diff --git a/tests/utils/tensor.test.js b/tests/utils/tensor.test.js
new file mode 100644
index 000000000..0d36954e3
--- /dev/null
+++ b/tests/utils/tensor.test.js
@@ -0,0 +1,176 @@
+import { Tensor, cat, mean, stack, layer_norm } from "../../src/transformers.js";
+import { compare } from "../test_utils.js";
+
+describe("Tensor operations", () => {
+  describe("cat", () => {
+    it("should concatenate on dim=0", async () => {
+      const t1 = new Tensor("float32", [1, 2, 3], [1, 3]);
+      const t2 = new Tensor("float32", [4, 5, 6, 7, 8, 9], [2, 3]);
+      const t3 = new Tensor("float32", [10, 11, 12], [1, 3]);
+
+      const target1 = new Tensor("float32", [1, 2, 3, 4, 5, 6, 7, 8, 9], [3, 3]);
+      const target2 = new Tensor("float32", [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [4, 3]);
+
+      // 2 tensors
+      const concatenated1 = cat([t1, t2], 0);
+      compare(concatenated1, target1, 1e-3);
+
+      // 3 tensors
+      const concatenated2 = cat([t1, t2, t3], 0);
+      compare(concatenated2, target2, 1e-3);
+    });
+
+    it("should concatenate on dim=1", async () => {
+      const t1 = new Tensor("float32", [1, 2, 3, -1, -2, -3], [2, 3, 1]);
+      const t2 = new Tensor("float32", [4, -4], [2, 1, 1]);
+      const t3 = new Tensor("float32", [5, 6, -5, -6], [2, 2, 1]);
+
+      const target1 = new Tensor("float32", [1, 2, 3, 4, -1, -2, -3, -4], [2, 4, 1]);
+      const target2 = new Tensor("float32", [1, 2, 3, 4, 5, 6, -1, -2, -3, -4, -5, -6], [2, 6, 1]);
+
+      // 2 tensors
+      const concatenated1 = cat([t1, t2], 1);
+      compare(concatenated1, target1, 1e-3);
+
+      // 3 tensors
+      const concatenated2 = cat([t1, t2, t3], 1);
+      compare(concatenated2, target2, 1e-3);
+    });
+
+    it("should concatenate on dim=-2", async () => {
+      const t1 = new Tensor("float32", [1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16], [2, 1, 3, 2]);
+      const t2 = new Tensor("float32", [7, 8, 9, 10, 17, 18, 19, 20], [2, 1, 2, 2]);
+
+      const target = new Tensor("float32", [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [2, 1, 5, 2]);
+
+      const concatenated = cat([t1, t2], -2);
+
+      compare(concatenated, target, 1e-3);
+    });
+
+    // TODO add tests for errors
+  });
+
+  describe("stack", () => {
+    const t1 = new Tensor("float32", [0, 1, 2, 3, 4, 5], [1, 3, 2]);
+
+    it("should stack on dim=0", async () => {
+      const target1 = new Tensor("float32", [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5], [2, 1, 3, 2]);
+      const target2 = new Tensor("float32", [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5], [3, 1, 3, 2]);
+
+      // 2 tensors
+      const stacked1 = stack([t1, t1], 0);
+      compare(stacked1, target1, 1e-3);
+
+      // 3 tensors
+      const stacked2 = stack([t1, t1, t1], 0);
+      compare(stacked2, target2, 1e-3);
+    });
+
+    it("should stack on dim=1", async () => {
+      const target1 = new Tensor("float32", [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5], [1, 2, 3, 2]);
+      const target2 = new Tensor("float32", [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5], [1, 3, 3, 2]);
+
+      // 2 tensors
+      const stacked1 = stack([t1, t1], 1);
+      compare(stacked1, target1, 1e-3);
+
+      // 3 tensors
+      const stacked2 = stack([t1, t1, t1], 1);
+      compare(stacked2, target2, 1e-3);
+    });
+
+    it("should stack on dim=-1", async () => {
+      const target1 = new Tensor("float32", [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5], [1, 3, 2, 2]);
+      const target2 = new Tensor("float32", [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5], [1, 3, 2, 3]);
+
+      // 2 tensors
+      const stacked1 = stack([t1, t1], -1);
+      compare(stacked1, target1, 1e-3);
+
+      // 3 tensors
+      const stacked2 = stack([t1, t1, t1], -1);
+      compare(stacked2, target2, 1e-3);
+    });
+  });
+
+  describe("permute", () => {
+    it("should permute", async () => {
+      const x = new Tensor("float32", [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [2, 3, 4]);
+      // Permute axes to (0, 1, 2) - No change
+      const permuted_1 = x.permute(0, 1, 2);
+      const target_1 = x;
+      compare(permuted_1, target_1, 1e-3);
+
+      // Permute axes to (0, 2, 1)
+      const permuted_2 = x.permute(0, 2, 1);
+      const target_2 = new Tensor("float32", [0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11, 12, 16, 20, 13, 17, 21, 14, 18, 22, 15, 19, 23], [2, 4, 3]);
+      compare(permuted_2, target_2, 1e-3);
+
+      // Permute axes to (1, 0, 2)
+      const permuted_3 = x.permute(1, 0, 2);
+      const target_3 = new Tensor("float32", [0, 1, 2, 3, 12, 13, 14, 15, 4, 5, 6, 7, 16, 17, 18, 19, 8, 9, 10, 11, 20, 21, 22, 23], [3, 2, 4]);
+      compare(permuted_3, target_3, 1e-3);
+
+      // Permute axes to (1, 2, 0)
+      const permuted_4 = x.permute(1, 2, 0);
+      const target_4 = new Tensor("float32", [0, 12, 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23], [3, 4, 2]);
+      compare(permuted_4, target_4, 1e-3);
+
+      // Permute axes to (2, 0, 1)
+      const permuted_5 = x.permute(2, 0, 1);
+      const target_5 = new Tensor("float32", [0, 4, 8, 12, 16, 20, 1, 5, 9, 13, 17, 21, 2, 6, 10, 14, 18, 22, 3, 7, 11, 15, 19, 23], [4, 2, 3]);
+      compare(permuted_5, target_5, 1e-3);
+
+      // Permute axes to (2, 1, 0)
+      const permuted_6 = x.permute(2, 1, 0);
+      const target_6 = new Tensor("float32", [0, 12, 4, 16, 8, 20, 1, 13, 5, 17, 9, 21, 2, 14, 6, 18, 10, 22, 3, 15, 7, 19, 11, 23], [4, 3, 2]);
+      compare(permuted_6, target_6, 1e-3);
+    });
+  });
+
+  describe("map", () => {
+    it("should double", async () => {
+      const original = new Tensor("float32", [1, 2, 3, 4, 5, 6], [2, 3]);
+      const target = new Tensor("float32", [2, 4, 6, 8, 10, 12], [2, 3]);
+
+      const doubled = original.map((x) => x * 2);
+      compare(doubled, target, 1e-3);
+    });
+  });
+
+  describe("mean", () => {
+    it("should calculate mean", async () => {
+      const t1 = new Tensor("float32", [1, 2, 3, 4, 5, 6], [2, 3, 1]);
+
+      const target = new Tensor("float32", [3.5], []);
+
+      const target0 = new Tensor("float32", [2.5, 3.5, 4.5], [3, 1]);
+      const target1 = new Tensor("float32", [2, 5], [2, 1]);
+      const target2 = new Tensor("float32", [1, 2, 3, 4, 5, 6], [2, 3]);
+
+      let avg = mean(t1);
+      compare(avg, target, 1e-3);
+
+      let avg0 = mean(t1, 0);
+      compare(avg0, target0, 1e-3);
+
+      let avg1 = mean(t1, 1);
+      compare(avg1, target1, 1e-3);
+
+      let avg2 = mean(t1, 2);
+      compare(avg2, target2, 1e-3);
+    });
+  });
+
+  describe("layer_norm", () => {
+    it("should calculate layer norm", async () => {
+      const t1 = new Tensor("float32", [1, 2, 3, 4, 5, 6], [2, 3]);
+
+      const target = new Tensor("float32", [-1.2247356176376343, 0.0, 1.2247356176376343, -1.2247357368469238, -1.1920928955078125e-7, 1.2247354984283447], [2, 3]);
+
+      const norm = layer_norm(t1, [t1.dims.at(-1)]);
+      compare(norm, target, 1e-3);
+    });
+  });
+});
diff --git a/tests/utils/tensor_ops.test.js b/tests/utils/tensor_ops.test.js
new file mode 100644
index 000000000..3227d5f58
--- /dev/null
+++ b/tests/utils/tensor_ops.test.js
@@ -0,0 +1,191 @@
+import { Tensor, interpolate_4d, matmul, rfft } from "../../src/transformers.js";
+import { init } from "../init.js";
+
+// Initialise the testing environment
+init();
+
+function expectToBeCloseToArray(actual, expected) {
+  expect(actual.length).toEqual(expected.length);
+  actual.forEach((x, i) => expect(x).toBeCloseTo(expected[i]));
+}
+
+function range(start, stop = undefined, step = 1) {
+  if (stop === undefined) {
+    stop = start;
+    start = 0;
+  }
+
+  const result = [];
+  for (let i = start; i < stop; i += step) {
+    result.push(i);
+  }
+  return result;
+}
+
+describe("Tensor operations", () => {
+  describe("interpolate", () => {
+    const input = new Tensor(
+      "float32",
+      new Float32Array(2 * 3 * 4 * 5).map((_, i) => i),
+      [2, 3, 4, 5],
+    );
+
+    const size = [2, 3, 3, 2];
+    it("bilinear", async () => {
+      const resized = await interpolate_4d(input, { mode: "bilinear", size });
+      const target = new Float32Array(
+        [
+          [
+            [
+              [1.5833335, 4.0833335],
+              [8.25, 10.75],
+              [14.916668, 17.416668],
+            ],
+            [
+              [21.583332, 24.083334],
+              [28.25, 30.75],
+              [34.916668, 37.416668],
+            ],
+            [
+              [41.583332, 44.083332],
+              [48.25, 50.75],
+              [54.916668, 57.416668],
+            ],
+          ],
+          [
+            [
+              [61.583332, 64.083336],
+              [68.25, 70.75],
+              [74.916664, 77.41667],
+            ],
+            [
+              [81.58333, 84.083336],
+              [88.25, 90.75],
+              [94.91667, 97.41667],
+            ],
+            [
+              [101.583336, 104.08333],
+              [108.25, 110.75],
+              [114.916664, 117.416664],
+            ],
+          ],
+        ].flat(Infinity),
+      );
+
+      expectToBeCloseToArray(target, resized.data);
+    });
+
+    it("bicubic", async () => {
+      const resized = await interpolate_4d(input, { mode: "bicubic", size });
+
+      const target = new Float32Array(
+        [
+          [
+            [
+              [1.2987545, 3.9628172],
+              [8.167969, 10.832031],
+              [15.037184, 17.701244],
+            ],
+            [
+              [21.298756, 23.962818],
+              [28.167969, 30.832031],
+              [35.037186, 37.701252],
+            ],
+            [
+              [41.298756, 43.96282],
+              [48.16797, 50.83203],
+              [55.037193, 57.701256],
+            ],
+          ],
+          [
+            [
+              [61.29875, 63.96282],
+              [68.16797, 70.83203],
+              [75.03719, 77.701256],
+            ],
+            [
+              [81.29875, 83.96282],
+              [88.16797, 90.83203],
+              [95.03721, 97.70126],
+            ],
+            [
+              [101.29875, 103.962814],
+              [108.16797, 110.83203],
+              [115.03721, 117.70127],
+            ],
+          ],
+        ].flat(Infinity),
+      );
+
+      expectToBeCloseToArray(target, resized.data);
+    });
+  });
+
+  describe("matmul", () => {
+    it("(2, 5) @ (5, 4) -> (2, 4)", async () => {
+      const a = new Tensor("float32", range(10), [2, 5]);
+      const b = new Tensor("float32", range(20), [5, 4]);
+
+      const result = await matmul(a, b);
+
+      const target = new Float32Array(
+        [
+          [120.0, 130.0, 140.0, 150.0],
+          [320.0, 355.0, 390.0, 425.0],
+        ].flat(),
+      );
+
+      expectToBeCloseToArray(target, result.data);
+    });
+  });
+
+  describe("rfft", () => {
+    it("non-power of 2", async () => {
+      const rows = 2;
+      const cols = 3;
+      const input = new Tensor("float32", range(rows * cols), [rows, cols]);
+      const dim = new Tensor("int64", [-1n], []);
+      const result = await rfft(input, dim);
+
+      const target = new Float32Array(
+        [
+          [
+            [3, 0],
+            [-1.5, 0.8660262823104858],
+          ],
+          [
+            [12, 0],
+            [-1.5, 0.866027295589447],
+          ],
+        ].flat(Infinity),
+      );
+
+      expectToBeCloseToArray(target, result.data);
+    });
+
+    it("power of 2", async () => {
+      const rows = 2;
+      const cols = 4;
+      const input = new Tensor("float32", range(rows * cols), [rows, cols]);
+      const dim = new Tensor("int64", [-1n], []);
+
+      const result = await rfft(input, dim);
+      const target = new Float32Array(
+        [
+          [
+            [6, 0],
+            [-2, 2],
+            [-2, 0],
+          ],
+          [
+            [22, 0],
+            [-2, 2],
+            [-2, 0],
+          ],
+        ].flat(Infinity),
+      );
+
+      expectToBeCloseToArray(target, result.data);
+    });
+  });
+});
diff --git a/tests/utils/utils.test.js b/tests/utils/utils.test.js
new file mode 100644
index 000000000..8a1891f19
--- /dev/null
+++ b/tests/utils/utils.test.js
@@ -0,0 +1,62 @@
+import { AutoProcessor, hamming, hanning, mel_filter_bank } from "../../src/transformers.js";
+import { getFile } from "../../src/utils/hub.js";
+
+import { MAX_TEST_EXECUTION_TIME } from "../init.js";
+import { compare } from "../test_utils.js";
+
+describe("Utilities", () => {
+  describe("Audio utilities", () => {
+    it(
+      "should calculate MEL filters",
+      async () => {
+        // NOTE: Uses official HF implementation as reference:
+        const processor = await AutoProcessor.from_pretrained("openai/whisper-tiny.en");
+        const config = processor.feature_extractor.config;
+
+        // True MEL filters
+        const original_mel_filters = config.mel_filters;
+
+        // Calculated MEL filters
+        const calculated_mel_filters = mel_filter_bank(
+          Math.floor(1 + config.n_fft / 2), // num_frequency_bins
+          config.feature_size, // num_mel_filters
+          0.0, // min_frequency
+          8000.0, // max_frequency
+          config.sampling_rate, // sampling_rate
+          "slaney", // norm
+          "slaney", // mel_scale
+        );
+
+        const original = original_mel_filters.flat();
+        const calculated = calculated_mel_filters.flat();
+
+        // Compute max difference
+        const maxdiff = original.reduce((maxdiff, _, i) => {
+          const diff = Math.abs(original[i] - calculated[i]);
+          return Math.max(maxdiff, diff);
+        }, -Infinity);
+        expect(maxdiff).toBeGreaterThanOrEqual(0);
+        expect(maxdiff).toBeLessThan(1e-6);
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+
+    it(
+      "should calculate window",
+      async () => {
+        compare(hanning(10), new Float64Array([0.0, 0.11697777844051105, 0.41317591116653485, 0.75, 0.9698463103929542, 0.9698463103929542, 0.75, 0.41317591116653485, 0.11697777844051105, 0.0]));
+        compare(hamming(10), new Float64Array([0.08000000000000002, 0.1876195561652702, 0.46012183827321207, 0.7700000000000001, 0.9722586055615179, 0.9722586055615179, 0.7700000000000001, 0.46012183827321207, 0.1876195561652702, 0.08000000000000002]));
+      },
+      MAX_TEST_EXECUTION_TIME,
+    );
+  });
+
+  describe("Hub utilities", () => {
+    it("Read data from blob", async () => {
+      const blob = new Blob(["Hello, world!"], { type: "text/plain" });
+      const blobUrl = URL.createObjectURL(blob);
+      const data = await getFile(blobUrl);
+      expect(await data.text()).toBe("Hello, world!");
+    });
+  });
+});
diff --git a/webpack.config.js b/webpack.config.js
index c958b45b8..218249249 100644
--- a/webpack.config.js
+++ b/webpack.config.js
@@ -1,54 +1,116 @@
-import CopyWebpackPlugin from 'copy-webpack-plugin';
-import TerserPlugin from 'terser-webpack-plugin';
-import { fileURLToPath } from 'url';
-import path from 'path';
+import TerserPlugin from "terser-webpack-plugin";
+import { fileURLToPath } from "url";
+import path from "path";
 
 const __dirname = path.dirname(fileURLToPath(import.meta.url));
 
 /**
- * @type {import('webpack').Configuration}
+ * Helper function to create webpack configurations.
+ * @param {Object} options Options for creating a webpack target.
+ * @param {string} options.name Name of output file.
+ * @param {string} options.suffix Suffix of output file.
+ * @param {string} options.type Type of library.
+ * @param {string} options.ignoreModules The list of modules to ignore.
+ * @param {string} options.externalModules The list of modules to set as external.
+ * @returns {import('webpack').Configuration} One webpack target.
  */
-export default {
+function buildConfig({
+  name = "",
+  suffix = ".js",
+  type = "module", // 'module' | 'commonjs'
+  ignoreModules = [],
+  externalModules = [],
+} = {}) {
+  const outputModule = type === "module";
+
+  const alias = Object.fromEntries(
+    ignoreModules.map((module) => {
+      return [module, false];
+    }),
+  );
+
+  /** @type {import('webpack').Configuration} */
+  const config = {
     mode: 'development',
     devtool: 'source-map',
     entry: {
-        // include dist in entry point so that when running dev server,
-        // we can access the files with /dist/...
-        'dist/transformers': './src/transformers.js',
-        'dist/transformers.min': './src/transformers.js',
+      [`transformers${name}`]: './src/transformers.js',
+      [`transformers${name}.min`]: './src/transformers.js',
     },
     output: {
-        filename: '[name].js',
-        path: __dirname,
-        library: {
-            type: 'module',
-        },
+      filename: `[name]${suffix}`,
+      path: path.join(__dirname, 'dist'),
+      library: {
+        type,
+      },
+      assetModuleFilename: '[name][ext]',
+      chunkFormat: 'module',
     },
-    plugins: [
-        // Copy .wasm files to dist folder
-        new CopyWebpackPlugin({
-            patterns: [
-                {
-                    from: 'node_modules/onnxruntime-web/dist/*.wasm',
-                    to: 'dist/[name][ext]'
-                },
-            ],
-        }),
-    ],
     optimization: {
-        minimize: true,
-        minimizer: [new TerserPlugin({
-            test: /\.min\.js$/,
-            extractComments: false,
-        })],
-    },
-    devServer: {
-        static: {
-            directory: __dirname
-        },
-        port: 8080
+      minimize: true,
+      minimizer: [new TerserPlugin({
+        test: new RegExp(`\\.min\\${suffix}$`),
+        extractComments: false,
+      })],
     },
     experiments: {
-        outputModule: true,
+      outputModule,
+    },
+    resolve: { alias },
+
+    externals: externalModules,
+
+    // Development server
+    devServer: {
+      static: {
+        directory: __dirname,
+      },
+      port: 8080,
     },
-};
+  };
+
+  if (outputModule) {
+    config.module = {
+      parser: {
+        javascript: {
+          importMeta: false
+        }
+      }
+    }
+  } else {
+    config.externalsType = 'commonjs';
+  }
+
+  return config;
+}
+
+// Do not bundle onnxruntime-web when packaging for Node.js.
+// Instead, we use the native library (onnxruntime-node).
+const NODE_IGNORE_MODULES = ["onnxruntime-web", "onnxruntime-web/webgpu"];
+
+// Do not bundle the following modules with webpack (mark as external)
+// NOTE: This is necessary for both type="module" and type="commonjs",
+// and will be ignored when building for web (only used for node/deno)
+const NODE_EXTERNAL_MODULES = ["onnxruntime-node", "sharp", "fs", "path", "url"];
+
+
+export default [
+  // Web-only build
+  buildConfig({
+    type: "module",
+  }),
+
+  // Node-compatible builds
+  buildConfig({
+    suffix: ".mjs",
+    type: "module",
+    ignoreModules: NODE_IGNORE_MODULES,
+    externalModules: NODE_EXTERNAL_MODULES,
+  }),
+  buildConfig({
+    suffix: ".cjs",
+    type: "commonjs",
+    ignoreModules: NODE_IGNORE_MODULES,
+    externalModules: NODE_EXTERNAL_MODULES,
+  }),
+];