Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _setup_tokenizer(self) -> None:
if self._tokenizer == "character":
self.tokenizer = word_tokenizers.CharacterTokenizer()
if self._tokenizer == "whitespace":
self.tokenizer = word_tokenizers.WhitespaceTokenizer()
if self._tokenizer == "kytea":
self.tokenizer = word_tokenizers.KyTeaTokenizer(
with_postag=self.with_postag,
model_path=self.model_path,
)
if self._tokenizer == "sentencepiece":
if self.model_path is None:
raise ValueError("`model_path` must be specified for sentencepiece.")
self.tokenizer = word_tokenizers.SentencepieceTokenizer(
model_path=self.model_path,
)
if self._tokenizer == "mecab":
self.tokenizer = word_tokenizers.MeCabTokenizer(
user_dictionary_path=self.user_dictionary_path,
system_dictionary_path=self.system_dictionary_path,