Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

Commit

Permalink
test(decoder): add unit tests for decoder
Browse files Browse the repository at this point in the history
  • Loading branch information
peakji committed Mar 5, 2023
1 parent 341e1fe commit 8d674c1
Showing 1 changed file with 46 additions and 0 deletions.
46 changes: 46 additions & 0 deletions tests/test_decoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
Test stateful tokenizer for stream decoding.
"""
from transformers import AutoTokenizer

from basaran.decoder import StreamDecoder


class TestDecoder:
"""Test stateful tokenizer for stream decoding."""

def test_byte_pair_encoding(self):
"""Test with Byte-Pair-Encoding."""
tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_name_or_path="./tests/data/tiny-random-bloom",
local_files_only=True,
)
decoder = StreamDecoder(tokenizer)

expected = "hello world ABC \n 你好"
tokens = tokenizer.encode(expected)

actual = ""
for token in tokens:
actual += decoder.decode(token)

assert actual == expected
assert decoder.end == len(expected)

def test_sentence_piece(self):
"""Test with SentencePiece."""
tokenizer = AutoTokenizer.from_pretrained(
pretrained_model_name_or_path="./tests/data/tiny-random-t5",
local_files_only=True,
)
decoder = StreamDecoder(tokenizer)

expected = "hello world ABC"
tokens = tokenizer.encode(expected)

actual = ""
for token in tokens:
actual += decoder.decode(token)

assert actual == expected
assert decoder.end == len(expected)

0 comments on commit 8d674c1

Please sign in to comment.