Skip to content

Commit

Permalink
GH-1711: Mark old tokenization functions as deprecated
Browse files Browse the repository at this point in the history
  • Loading branch information
Mario Sänger committed Jun 25, 2020
1 parent aa2830d commit eb8b358
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions flair/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from collections import Counter
from collections import defaultdict

from deprecated import deprecated

from segtok.segmenter import split_single
from segtok.tokenizer import split_contractions
from segtok.tokenizer import word_tokenizer
Expand Down Expand Up @@ -730,18 +732,21 @@ def name(self) -> str:
return self.__class__.__name__ + "_" + self.tokenizer_func.__name__


@deprecated(version="0.5", reason="Use 'flair.data.SpaceTokenizer' instead.")
def space_tokenizer(text: str) -> List[Token]:
# We don't want to create a SpaceTokenizer object each time this function is called,
# so delegate the call directly to the static run_tokenize method
return SpaceTokenizer.run_tokenize(text)


@deprecated(version="0.5", reason="Use 'flair.data.SegTokTokenizer' instead.")
def segtok_tokenizer(text: str) -> List[Token]:
# We don't want to create a SegTokTokenizer object each time this function is called,
# so delegate the call directly to the static run_tokenize method
return SegTokTokenizer.run_tokenize(text)


@deprecated(version="0.5", reason="Use 'flair.data.SpacyTokenizer' instead.")
def build_spacy_tokenizer(model) -> Callable[[str], List[Token]]:
spacy_tokenizer = SpacyTokenizer(model)

Expand All @@ -751,6 +756,7 @@ def tokenizer(text: str) -> List[Token]:
return tokenizer


@deprecated(version="0.5", reason="Use 'flair.data.JapaneseTokenizer' instead.")
def build_japanese_tokenizer(tokenizer: str = "MeCab"):
japanese_tokenizer = JapaneseTokenizer(tokenizer)

Expand Down

0 comments on commit eb8b358

Please sign in to comment.