Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_segments(njobs):
# one two three four five in Maya Yucatec
text = ['untuʼuleʼ kaʼapʼeʼel', 'oʼoxpʼeʼel', 'kantuʼuloʼon chincho']
with pytest.raises(RuntimeError):
phonemize(
text, language='yucatec', backend='segments',
use_sampa=True, strip=True, njobs=njobs)
out = phonemize(
text, language='yucatec', backend='segments',
strip=True, njobs=njobs)
assert out == [
'untṵːlḛ ka̰ːpʼḛːl', 'o̰ːʃpʼḛːl', 'kantṵːlo̰ːn t̠͡ʃint̠͡ʃo']
out = phonemize(
text, language='yucatec', backend='segments',
strip=False, njobs=njobs)
assert out == [
'untṵːlḛ ka̰ːpʼḛːl ', 'o̰ːʃpʼḛːl ', 'kantṵːlo̰ːn t̠͡ʃint̠͡ʃo ']
out = phonemize(
def test_festival_bad():
# cannot use options valid for espeak only
text = ['one two', 'three', 'four five']
with pytest.raises(RuntimeError):
phonemize(
text, language='en-us', backend='festival', use_sampa=True)
with pytest.raises(RuntimeError):
phonemize(
text, language='en-us', backend='festival', with_stress=True)
with pytest.raises(RuntimeError):
phonemize(
text, language='en-us', backend='festival',
language_switch='remove-flags')
def test_festival(njobs):
text = ['one two', 'three', 'four five']
out = phonemize(
text, language='en-us', backend='festival',
strip=True, njobs=njobs)
assert out == ['wahn tuw', 'thriy', 'faor fayv']
out = phonemize(
text, language='en-us', backend='festival',
strip=False, njobs=njobs)
assert out == ['wahn tuw ', 'thriy ', 'faor fayv ']
out = phonemize(
' '.join(text), language='en-us', backend='festival',
strip=True, njobs=njobs)
assert out == ' '.join(['wahn tuw', 'thriy', 'faor fayv'])
out = phonemize(
' '.join(text), language='en-us', backend='festival',
strip=False, njobs=njobs)
assert out == ' '.join(['wahn tuw', 'thriy', 'faor fayv '])
out = phonemize(
'\n'.join(text), language='en-us', backend='festival',
def test_espeak(njobs):
text = ['one two', 'three', 'four five']
out = phonemize(
text, language='en-us', backend='espeak',
strip=True, njobs=njobs)
assert out == ['wʌn tuː', 'θɹiː', 'foːɹ faɪv']
if EspeakBackend.is_espeak_ng():
out = phonemize(
text, language='en-us', backend='espeak', use_sampa=True,
strip=True, njobs=njobs)
assert out == ['wVn tu:', 'Tri:', 'fo@ faIv']
out = phonemize(
text, language='en-us', backend='espeak',
strip=False, njobs=njobs)
assert out == ['wʌn tuː ', 'θɹiː ', 'foːɹ faɪv ']
out = phonemize(
' '.join(text), language='en-us', backend='espeak',
strip=True, njobs=njobs)
assert out == ' '.join(['wʌn tuː', 'θɹiː', 'foːɹ faɪv'])
out = phonemize(
' '.join(text), language='en-us', backend='espeak',
def test_festival_bad():
# cannot use options valid for espeak only
text = ['one two', 'three', 'four five']
with pytest.raises(RuntimeError):
phonemize(
text, language='en-us', backend='festival', use_sampa=True)
with pytest.raises(RuntimeError):
phonemize(
text, language='en-us', backend='festival', with_stress=True)
with pytest.raises(RuntimeError):
phonemize(
text, language='en-us', backend='festival',
language_switch='remove-flags')
out = phonemize(
text, language='en-us', backend='espeak', use_sampa=True,
strip=True, njobs=njobs)
assert out == ['wVn tu:', 'Tri:', 'fo@ faIv']
out = phonemize(
text, language='en-us', backend='espeak',
strip=False, njobs=njobs)
assert out == ['wʌn tuː ', 'θɹiː ', 'foːɹ faɪv ']
out = phonemize(
' '.join(text), language='en-us', backend='espeak',
strip=True, njobs=njobs)
assert out == ' '.join(['wʌn tuː', 'θɹiː', 'foːɹ faɪv'])
out = phonemize(
' '.join(text), language='en-us', backend='espeak',
strip=False, njobs=njobs)
assert out == ' '.join(['wʌn tuː', 'θɹiː', 'foːɹ faɪv '])
out = phonemize(
'\n'.join(text), language='en-us', backend='espeak',
strip=True, njobs=njobs)
assert out == '\n'.join(['wʌn tuː', 'θɹiː', 'foːɹ faɪv'])
out = phonemize(
'\n'.join(text), language='en-us', backend='espeak',
strip=False, njobs=njobs)
assert out == '\n'.join(['wʌn tuː ', 'θɹiː ', 'foːɹ faɪv '])
def test_festival_bad():
# cannot use options valid for espeak only
text = ['one two', 'three', 'four five']
with pytest.raises(RuntimeError):
phonemize(
text, language='en-us', backend='festival', use_sampa=True)
with pytest.raises(RuntimeError):
phonemize(
text, language='en-us', backend='festival', with_stress=True)
with pytest.raises(RuntimeError):
phonemize(
text, language='en-us', backend='festival',
language_switch='remove-flags')
def test_espeak(njobs):
text = ['one two', 'three', 'four five']
out = phonemize(
text, language='en-us', backend='espeak',
strip=True, njobs=njobs)
assert out == ['wʌn tuː', 'θɹiː', 'foːɹ faɪv']
if EspeakBackend.is_espeak_ng():
out = phonemize(
text, language='en-us', backend='espeak', use_sampa=True,
strip=True, njobs=njobs)
assert out == ['wVn tu:', 'Tri:', 'fo@ faIv']
out = phonemize(
text, language='en-us', backend='espeak',
strip=False, njobs=njobs)
assert out == ['wʌn tuː ', 'θɹiː ', 'foːɹ faɪv ']
out = phonemize(
' '.join(text), language='en-us', backend='espeak',
strip=True, njobs=njobs)
assert out == ' '.join(['wʌn tuː', 'θɹiː', 'foːɹ faɪv'])
out = phonemize(
' '.join(text), language='en-us', backend='espeak',
strip=False, njobs=njobs)
assert out == ' '.join(['wʌn tuː', 'θɹiː', 'foːɹ faɪv '])
out = phonemize(
'\n'.join(text), language='en-us', backend='espeak',
# configure the separator for phonemes, syllables and words.
sep = separator.Separator(
phone=args.phone_separator,
syllable=args.syllable_separator,
word=args.word_separator)
log.debug('separator is %s', sep)
# load the input text (python2 optionnally needs an extra decode)
text = streamin.read()
try:
text = text.decode('utf8')
except (AttributeError, UnicodeEncodeError):
pass
# phonemize the input text
out = phonemize.phonemize(
text,
language=args.language,
backend=args.backend,
separator=sep,
strip=args.strip,
with_stress=args.with_stress,
use_sampa=args.sampa,
language_switch=args.language_switch,
njobs=args.njobs,
logger=log)
if len(out):
streamout.write(out + '\n')
def text2phone(text, language):
'''
Convert graphemes to phonemes.
'''
seperator = phonemizer.separator.Separator(' |', '', '|')
#try:
punctuations = re.findall(PHONEME_PUNCTUATION_PATTERN, text)
ph = phonemize(text, separator=seperator, strip=False, njobs=1, backend='espeak', language=language)
ph = ph[:-1].strip() # skip the last empty character
# Replace \n with matching punctuations.
if punctuations:
# if text ends with a punctuation.
if text[-1] == punctuations[-1]:
for punct in punctuations[:-1]:
ph = ph.replace('| |\n', '|'+punct+'| |', 1)
try:
ph = ph + punctuations[-1]
except:
print(text)
else:
for punct in punctuations:
ph = ph.replace('| |\n', '|'+punct+'| |', 1)
return ph