Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
language/script pair that should be loaded
preproc (bool): apply preprocessors
postproc (bool): apply prostprocessors
ligatures (bool): use precomposed ligatures instead of standard IPA
cedict_filename (str): path to file containing the CC-CEDict
dictionary; relevant only for Chinese
rev (boolean): if True, load reverse transliteration
rev_preproc (bool): if True, apply preprocessor when reverse transliterating
rev_postproc (bool): if True, apply postprocessor when reverse transliterating
"""
if code in self.special:
self.epi = self.special[code](ligatures=ligatures, cedict_file=cedict_file)
else:
self.epi = SimpleEpitran(code, preproc, postproc, ligatures, rev, rev_preproc, rev_postproc)
self.ft = panphon.featuretable.FeatureTable()
self.xsampa = XSampa()
self.puncnorm = PuncNorm()
def __init__(self, lang_script_codes, cedict_file=None):
"""Construct a Backoff object.
Args:
lang_script_codes (list): codes for languages to try, starting
with the highest priority languages
cedict_file (str): path to the CC-CEdict dictionary file
(necessary only when cmn-Hans or cmn-Hant are used)
"""
self.langs = [_epitran.Epitran(c, cedict_file=cedict_file)
for c in lang_script_codes]
self.num_re = re.compile(r'\p{Number}+')
self.ft = panphon.featuretable.FeatureTable()
self.xsampa = XSampa()
self.puncnorm = PuncNorm()
self.dias = [StripDiacritics(c) for c in lang_script_codes]