Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if lang == 'zh' and jieba is None:
raise ModuleNotFoundError(
'''No module named 'jieba'. Install laserembeddings with 'zh' extra to fix that: "pip install laserembeddings[zh]"'''
)
if lang == 'ja' and MeCab is None:
raise ModuleNotFoundError(
'''No module named 'MeCab'. Install laserembeddings with 'ja' extra to fix that: "pip install laserembeddings[ja]"'''
)
self.lang = lang
self.lower_case = lower_case
self.romanize = romanize if romanize is not None else lang == 'el'
self.descape = descape
self.normalizer = MosesPunctNormalizer(lang=lang)
self.tokenizer = MosesTokenizer(lang=lang)
self.mecab_tokenizer = MeCab.Tagger(
"-O wakati -b 50000") if lang == 'ja' else None
def moses_punct_norm(self, text, lang):
if lang not in self.cache_moses_punct_normalizer:
punct_normalizer = sm.MosesPunctNormalizer(lang=lang)
self.cache_moses_punct_normalizer[lang] = punct_normalizer
else:
punct_normalizer = self.cache_moses_punct_normalizer[lang]
return punct_normalizer.normalize(text)