Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
.format(self.g2p[source]))
target = source
except IndexError:
logging.debug("self.g2p[source]={}".format(self.g2p[source]))
target = source
tr_list.append((target, True))
text = text[len(source):]
else:
tr_list.append((text[0], False))
self.nils[text[0]] += 2
text = text[1:]
text = ''.join([s for (s, _) in filter(filter_func, tr_list)])
if self.postproc:
text = self.postprocessor.process(text)
if ligatures or self.ligatures:
text = ligaturize(text)
if normpunc:
text = self.puncnorm.norm(text)
return unicodedata.normalize('NFC', text)
Args:
text (unicode): English text
normpunc (bool): if True, normalize punctuation downward
ligatures (bool): if True, use non-standard ligatures instead of
standard IPA
"""
text = unicodedata.normalize('NFC', text)
acc = []
for chunk in self.chunk_re.findall(text):
if self.letter_re.match(chunk):
acc.append(self.english_g2p(chunk))
else:
acc.append(chunk)
text = ''.join(acc)
text = self.puncnorm.norm(text) if normpunc else text
text = ligaturize(text) if (ligatures or self.ligatures) else text
return text
Returns:
unicode: IPA string
"""
tokens = self.cedict.tokenize(text)
ipa_tokens = []
for token in tokens:
if token in self.cedict.hanzi:
(pinyin, _) = self.cedict.hanzi[token]
pinyin = u''.join(pinyin).lower()
ipa = self.rules.apply(pinyin)
ipa_tokens.append(ipa.replace(u',', u''))
else:
if normpunc:
token = self.normalize_punc(token)
ipa_tokens.append(token)
ipa_tokens = map(ligaturize, ipa_tokens)\
if ligatures else ipa_tokens
return u''.join(ipa_tokens)