Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@attr("local_only")
def test_ja_jsut():
f = getattr(frontend, "jp")
from nnmnkwii.datasets import jsut
from tqdm import trange
import jaconv
d = jsut.TranscriptionDataSource("/home/ryuichi/data/jsut_ver1.1/",
subsets=jsut.available_subsets)
texts = d.collect_files()
for p in [0.0, 0.5, 1.0]:
for idx in trange(len(texts)):
text = texts[idx]
seq = f.text_to_sequence(text, p=p)
assert seq[-1] == eos
t = f.sequence_to_text(seq)
if idx < 10:
print("""{0}: {1}\n{0}: {2}\n""".format(idx, text, t))
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
executor = ProcessPoolExecutor(max_workers=num_workers)
futures = []
transcriptions = jsut.TranscriptionDataSource(
in_dir, subsets=jsut.available_subsets).collect_files()
wav_paths = jsut.WavFileDataSource(
in_dir, subsets=jsut.available_subsets).collect_files()
for index, (text, wav_path) in enumerate(zip(transcriptions, wav_paths)):
futures.append(executor.submit(
partial(_process_utterance, out_dir, index + 1, wav_path, text)))
return [future.result() for future in tqdm(futures)]
def __init__(self, data_root, subsets=["basic5000"], validate=True):
super(TranscriptionDataSource, self).__init__(
data_root, subsets, False, validate)