Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, lang_script_codes, cedict_file=None):
"""Construct a Backoff object.
Args:
lang_script_codes (list): codes for languages to try, starting
with the highest priority languages
cedict_file (str): path to the CC-CEdict dictionary file
(necessary only when cmn-Hans or cmn-Hant are used)
"""
self.langs = [_epitran.Epitran(c, cedict_file=cedict_file)
for c in lang_script_codes]
self.num_re = re.compile(r'\p{Number}+')
self.ft = panphon.featuretable.FeatureTable()
self.xsampa = XSampa()
self.puncnorm = PuncNorm()
self.dias = [StripDiacritics(c) for c in lang_script_codes]
postproc (bool): if True, apply postprocessors
ligatures (bool): if True, use phonetic ligatures for affricates
instead of standard IPA
rev (bool): if True, load reverse transliteration
rev_preproc (bool): if True, apply preprocessor when reverse transliterating
rev_postproc (bool): if True, apply postprocessor when reverse transliterating
"""
self.rev = rev
self.g2p = self._load_g2p_map(code, False)
self.regexp = self._construct_regex(self.g2p.keys())
self.puncnorm = PuncNorm()
self.ft = panphon.FeatureTable()
self.num_panphon_fts = len(self.ft.names)
self.preprocessor = PrePostProcessor(code, 'pre', False)
self.postprocessor = PrePostProcessor(code, 'post', False)
self.strip_diacritics = StripDiacritics(code)
self.preproc = preproc
self.postproc = postproc
self.ligatures = ligatures
self.rev_preproc = rev_preproc
self.rev_postproc = rev_postproc
if rev:
self.rev_g2p = self._load_g2p_map(code, True)
self.rev_regexp = self._construct_regex(self.rev_g2p.keys())
self.rev_preprocessor = PrePostProcessor(code, 'pre', True)
self.rev_postprocessor = PrePostProcessor(code, 'post', True)
self.nils = defaultdict(int)