Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self,
smooth_method="floor", smooth_value=0.01,
lowercase=False, use_effective_order=True,
lang="en"):
self.smooth_method = smooth_method
self.smooth_value = smooth_value
self.lowercase = lowercase
self.use_effective_order = use_effective_order
if isinstance(lang, str):
self.lang = lang
self._lang = get_lang(lang)
elif isinstance(lang, BaseLang):
self.lang = lang.lang
self._lang = lang
self._tokenizer = DEFAULT_TOKENIZER
if self.lang == "ja":
def tokenizer_ja(text):
words = self._lang.tokenize_with_preprocess(text)
return " ".join(words)
TOKENIZERS["ja"] = tokenizer_ja
self._tokenizer = "ja"
elif self.lang == "zh":
self._tokenizer = "zh"
def __init__(self,
smooth="floor", smooth_floor=0.01,
lowercase=False, use_effective_order=True,
tokenizer=DEFAULT_TOKENIZER):
self.smooth = smooth
self.smooth_floor = smooth_floor
self.lowercase = lowercase
self.use_effective_order = use_effective_order
self.tokenizer = tokenizer