Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_english():
backend = EspeakBackend('en-us')
text = u'hello world\ngoodbye\nthird line\nyet another'
out = '\n'.join(backend._phonemize_aux(
text, separator.default_separator, True))
assert out == u'həloʊ wɜːld\nɡʊdbaɪ\nθɜːd laɪn\njɛt ɐnʌðɚ'
def test_separator():
backend = SegmentsBackend('cree')
text = 'achi acho'
sep = separator.default_separator
assert backend.phonemize(text, separator=sep) == u'ʌtʃɪ ʌtʃʊ '
assert backend.phonemize(text, separator=sep, strip=True) == u'ʌtʃɪ ʌtʃʊ'
def test_str():
separator = Separator(word='w', syllable='s', phone='p')
assert str(separator) == '(phone: "p", syllable: "s", word: "w")'
assert str(default_separator) == '(phone: "", syllable: "", word: " ")'
def test_prop():
# read only attributes
with pytest.raises(AttributeError):
default_separator.phone = 'a'
with pytest.raises(AttributeError):
default_separator.syllable = 'a'
with pytest.raises(AttributeError):
default_separator.word = 'a'
def test_stress():
backend = EspeakBackend('en-us', with_stress=False)
assert u'həloʊ wɜːld' == backend._phonemize_aux(
u'hello world', separator.default_separator, True)[0]
backend = EspeakBackend('en-us', with_stress=True)
assert u'həlˈoʊ wˈɜːld' == backend._phonemize_aux(
u'hello world', separator.default_separator, True)[0]
def phonemize(text, language='en-us', separator=default_separator,
strip=False, logger=None):
"""Return a phonemized version of `text` with espeak
As espeak don't support multiline input, we must run a separate
espeak for each line of the `text`
"""
assert language in supported_languages()
# old espeak versions don't support --sep
version = espeak_version_short()
if logger:
logger.debug('espeak version is: {}'.format(version))
sep = '--sep=_'
if version == '1.48.03' or int(version.split('.')[1]) <= 47:
help='number of parallel jobs, default is %(default)s.')
# input/output arguments
group = parser.add_argument_group('input/output')
group.add_argument(
'input', default=sys.stdin, nargs='?', metavar='',
help='input text file to phonemize, if not specified read from stdin.')
group.add_argument(
'-o', '--output', default=sys.stdout, metavar='',
help='output text file to write, if not specified write to stdout.')
group = parser.add_argument_group('separators')
group.add_argument(
'-p', '--phone-separator', metavar='',
default=separator.default_separator.phone,
help='phone separator, default is "%(default)s".')
group.add_argument(
'-w', '--word-separator', metavar='',
default=separator.default_separator.word,
help='word separator, default is "%(default)s".')
group.add_argument(
'-s', '--syllable-separator', metavar='',
default=separator.default_separator.syllable,
help='''syllable separator, only valid for festival backend,
this option has no effect if espeak or segments is used.
Default is "%(default)s".''')
group.add_argument(
'--strip', action='store_true',
def phonemize(text, language='en-us', backend='festival',
separator=default_separator, strip=False,
with_stress=False, use_sampa=False,
language_switch='keep-flags',
njobs=1, logger=get_logger()):
"""Multilingual text to phonemes converter
Return a phonemized version of an input `text`, given its
`language` and a phonemization `backend`.
Parameters
----------
text (str or list of str): The text to be phonemized. Any empty
line will be ignored. If `text` is an str, it can be multiline
(lines being separated by \n). If `text` is a list, each
element is considered as a separated line. Each line is
considered as a text utterance.
def phonemize(text, language='en-us', separator=default_separator,
strip=False, logger=None):
"""Return a phonemized version of `text` with festival
This function is a wrapper on festival, a text to speech program,
allowing simple phonemization of some English text. The US
phoneset we use is the default one in festival, as described at
http://www.festvox.org/bsv/c4711.html
Any opening and closing parenthesis in `text` are removed, as they
interfer with the Scheme expression syntax. Moreover double quotes
are replaced by simple quotes because double quotes denotes
utterances boundaries in festival.
Parsing a ill-formed Scheme expression during post-processing
(typically with unbalanced parenthesis) raises an IndexError.
'input', default=sys.stdin, nargs='?', metavar='',
help='input text file to phonemize, if not specified read from stdin.')
group.add_argument(
'-o', '--output', default=sys.stdout, metavar='',
help='output text file to write, if not specified write to stdout.')
group = parser.add_argument_group('separators')
group.add_argument(
'-p', '--phone-separator', metavar='',
default=separator.default_separator.phone,
help='phone separator, default is "%(default)s".')
group.add_argument(
'-w', '--word-separator', metavar='',
default=separator.default_separator.word,
help='word separator, default is "%(default)s".')
group.add_argument(
'-s', '--syllable-separator', metavar='',
default=separator.default_separator.syllable,
help='''syllable separator, only valid for festival backend,
this option has no effect if espeak or segments is used.
Default is "%(default)s".''')
group.add_argument(
'--strip', action='store_true',
help='removes the end separators in phonemized tokens.')
group = parser.add_argument_group('backends')
group.add_argument(
'-b', '--backend', metavar='', default='espeak',