Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, arpabet='arpabet', ligatures=False, cedict_file=None):
"""Construct a Flite "wrapper"
Args:
arpabet (str): file containing ARPAbet to IPA mapping
ligatures (bool): if True, use non-standard ligatures instead of
standard IPA
cedict_filename (str): path to CC-CEDict dictionary (included for
compatibility)
"""
arpabet = pkg_resources.resource_filename(__name__, os.path.join('data', arpabet + '.csv'))
self.arpa_map = self._read_arpabet(arpabet)
self.chunk_re = re.compile(r"([A-Za-z'’]+|[^A-Za-z'’]+)", re.U)
self.letter_re = re.compile(r"[A-Za-z'’]+")
self.regexp = re.compile(r'[A-Za-z]')
self.puncnorm = PuncNorm()
self.ligatures = ligatures
self.ft = panphon.FeatureTable()
self.num_panphon_fts = len(self.ft.names)
preproc (bool): apply preprocessors
postproc (bool): apply prostprocessors
ligatures (bool): use precomposed ligatures instead of standard IPA
cedict_filename (str): path to file containing the CC-CEDict
dictionary; relevant only for Chinese
rev (boolean): if True, load reverse transliteration
rev_preproc (bool): if True, apply preprocessor when reverse transliterating
rev_postproc (bool): if True, apply postprocessor when reverse transliterating
"""
if code in self.special:
self.epi = self.special[code](ligatures=ligatures, cedict_file=cedict_file)
else:
self.epi = SimpleEpitran(code, preproc, postproc, ligatures, rev, rev_preproc, rev_postproc)
self.ft = panphon.featuretable.FeatureTable()
self.xsampa = XSampa()
self.puncnorm = PuncNorm()
"""Constructs the backend object epitran uses for most languages
Args:
code (str): ISO 639-3 code and ISO 15924 code joined with a hyphen
preproc (bool): if True, apply preprocessor
postproc (bool): if True, apply postprocessors
ligatures (bool): if True, use phonetic ligatures for affricates
instead of standard IPA
rev (bool): if True, load reverse transliteration
rev_preproc (bool): if True, apply preprocessor when reverse transliterating
rev_postproc (bool): if True, apply postprocessor when reverse transliterating
"""
self.rev = rev
self.g2p = self._load_g2p_map(code, False)
self.regexp = self._construct_regex(self.g2p.keys())
self.puncnorm = PuncNorm()
self.ft = panphon.FeatureTable()
self.num_panphon_fts = len(self.ft.names)
self.preprocessor = PrePostProcessor(code, 'pre', False)
self.postprocessor = PrePostProcessor(code, 'post', False)
self.strip_diacritics = StripDiacritics(code)
self.preproc = preproc
self.postproc = postproc
self.ligatures = ligatures
self.rev_preproc = rev_preproc
self.rev_postproc = rev_postproc
if rev:
self.rev_g2p = self._load_g2p_map(code, True)
self.rev_regexp = self._construct_regex(self.rev_g2p.keys())
self.rev_preprocessor = PrePostProcessor(code, 'pre', True)
self.rev_postprocessor = PrePostProcessor(code, 'post', True)
def __init__(self, lang_script_codes, cedict_file=None):
"""Construct a Backoff object.
Args:
lang_script_codes (list): codes for languages to try, starting
with the highest priority languages
cedict_file (str): path to the CC-CEdict dictionary file
(necessary only when cmn-Hans or cmn-Hant are used)
"""
self.langs = [_epitran.Epitran(c, cedict_file=cedict_file)
for c in lang_script_codes]
self.num_re = re.compile(r'\p{Number}+')
self.ft = panphon.featuretable.FeatureTable()
self.xsampa = XSampa()
self.puncnorm = PuncNorm()
self.dias = [StripDiacritics(c) for c in lang_script_codes]