Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from epitran.epihan import Epihan, EpihanTraditional
from epitran.flite import FliteLexLookup
from epitran.puncnorm import PuncNorm
from epitran.simple import SimpleEpitran
from epitran.xsampa import XSampa
if sys.version_info[0] == 3:
def unicode(x):
return x
logging.disable(logging.DEBUG)
class Epitran(object):
"""Unified interface for IPA transliteration/transcription"""
special = {'eng-Latn': FliteLexLookup,
'cmn-Hans': Epihan,
'cmn-Hant': EpihanTraditional}
def __init__(self, code, preproc=True, postproc=True, ligatures=False, cedict_file=None,
rev=False, rev_preproc=True, rev_postproc=True):
"""Construct Epitran transliteration/transcription object
Args:
code (str): ISO 639-3 plus "-" plus ISO 15924 code of the
language/script pair that should be loaded
preproc (bool): apply preprocessors
postproc (bool): apply prostprocessors
ligatures (bool): use precomposed ligatures instead of standard IPA
cedict_filename (str): path to file containing the CC-CEDict
dictionary; relevant only for Chinese
rev (boolean): if True, load reverse transliteration
rev_preproc (bool): if True, apply preprocessor when reverse transliterating
pinyin = u''.join(pinyin).lower()
ipa = self.rules.apply(pinyin)
ipa_tokens.append(ipa.replace(u',', u''))
else:
if normpunc:
token = self.normalize_punc(token)
ipa_tokens.append(token)
ipa_tokens = map(ligaturize, ipa_tokens)\
if ligatures else ipa_tokens
return u''.join(ipa_tokens)
def strict_trans(self, text, normpunc=False, ligatures=False):
return self.transliterate(text, normpunc, ligatures)
class EpihanTraditional(Epihan):
def __init__(self, ligatures=False, cedict_file=None, rules_file='pinyin-to-ipa.txt'):
"""Construct epitran object for Traditional Chinese
Args:
ligatures (bool): if True, use ligatures instead of standard IPA
cedict_file (str): path to CC-CEDict dictionary file
rules_file (str): name of file with rules for converting pinyin to
IPA
"""
if not cedict_file:
raise MissingData('Please specify a location for the CC-CEDict file.')
rules_file = os.path.join('data', 'rules', rules_file)
rules_file = pkg_resources.resource_filename(__name__, rules_file)
self.cedict = cedict.CEDictTrie(cedict_file, traditional=True)
self.rules = rules.Rules([rules_file])
self.regexp = re.compile(r'\p{Han}')