Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Multi speakers
data_source = MyFileDataSource(
DATA_DIR, speakers=["SF1", "SF2"], max_files=max_files)
X = FileSourceDataset(data_source)
assert len(X) == max_files
# Speaker labels
Y = data_source.labels
assert np.all(Y[:max_files // 2] == 0)
assert np.all(Y[max_files // 2:] == 1)
# Custum speaker id
data_source = MyFileDataSource(
DATA_DIR, speakers=["SF1", "SF2"], max_files=max_files,
labelmap={"SF1": 1, "SF2": 0})
X = FileSourceDataset(data_source)
Y = data_source.labels
assert np.all(Y[:max_files // 2] == 1)
assert np.all(Y[max_files // 2:] == 0)
# Use all data
data_source = MyFileDataSource(
DATA_DIR, speakers=["SF1", "SF2"], max_files=None)
X = FileSourceDataset(data_source)
assert len(X) == 162 * 2
super(MyWavFileDataSource, self).__init__(data_root, speakers, labelmap)
self.alpha = pysptk.util.mcepalpha(48000)
def collect_features(self, path):
fs, x = wavfile.read(path)
assert fs == 48000
x = x.astype(np.float64)
f0, timeaxis = pyworld.dio(x, fs, frame_period=5)
f0 = pyworld.stonemask(x, f0, timeaxis, fs)
spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
spectrogram = trim_zeros_frames(spectrogram)
mc = pysptk.sp2mc(spectrogram, order=24, alpha=self.alpha)
return mc.astype(np.float32)
data_source = MyWavFileDataSource(DATA_DIR, speakers=["225"])
X = FileSourceDataset(data_source)
print(X[0].shape)
def _get_small_datasets(padded=False, duration=False):
if duration:
X, Y = example_file_data_sources_for_duration_model()
else:
X, Y = example_file_data_sources_for_acoustic_model()
if padded:
X = PaddedFileSourceDataset(X, padded_length=1000)
Y = PaddedFileSourceDataset(Y, padded_length=1000)
else:
X = FileSourceDataset(X)
Y = FileSourceDataset(Y)
return X, Y
warn("Data doesn't exist at {}".format(DATA_DIR))
return
class MyTextDataSource(jsut.TranscriptionDataSource):
def __init__(self, data_root, subsets):
super(MyTextDataSource, self).__init__(data_root, subsets)
def collect_features(self, text):
return text
data_source = MyTextDataSource(DATA_DIR, subsets=["basic5000"])
X1 = FileSourceDataset(data_source)
assert X1[0] == u"水をマレーシアから買わなくてはならないのです。"
data_source = MyTextDataSource(DATA_DIR, subsets=["travel1000"])
X2 = FileSourceDataset(data_source)
assert X2[0] == u"あなたの荷物は、ロサンゼルスに残っています。"
# Multiple subsets
data_source = MyTextDataSource(DATA_DIR, subsets=["basic5000", "travel1000"])
X3 = FileSourceDataset(data_source)
assert X3[0] == u"水をマレーシアから買わなくてはならないのです。"
assert len(X3) == len(X1) + len(X2)
# All subsets
data_source = MyTextDataSource(DATA_DIR, subsets=jsut.available_subsets)
X = FileSourceDataset(data_source)
# As of 2017/11/2. There were 30 missing wav files.
# This should be 7696
assert len(X) == 7696
class MyWavFileDataSource(jsut.WavFileDataSource):
def __test_wrong_num_args():
X = FileSourceDataset(WrongNumberOfArgsDataSource())
X[0]
def get_data_loaders(data_root, speaker_id, test_shuffle=True):
data_loaders = {}
local_conditioning = hparams.cin_channels > 0
for phase in ["train", "test"]:
train = phase == "train"
X = FileSourceDataset(RawAudioDataSource(data_root, speaker_id=speaker_id,
train=train,
test_size=hparams.test_size,
test_num_samples=hparams.test_num_samples,
random_state=hparams.random_state))
if local_conditioning:
Mel = FileSourceDataset(MelSpecDataSource(data_root, speaker_id=speaker_id,
train=train,
test_size=hparams.test_size,
test_num_samples=hparams.test_num_samples,
random_state=hparams.random_state))
assert len(X) == len(Mel)
print("Local conditioning enabled. Shape of a sample: {}.".format(
Mel[0].shape))
else:
Mel = None
print("[{}]: length of the dataset is {}".format(phase, len(X)))
if train:
lengths = np.array(X.file_data_source.lengths)
# Prepare sampler
sampler = PartialyRandomizedSimilarTimeLengthSampler(
lengths, batch_size=hparams.batch_size)
if hparams.preset is not None and hparams.preset != "":
preset = hparams.presets[hparams.preset]
import json
hparams.parse_json(json.dumps(preset))
print("Override hyper parameters with preset \"{}\": {}".format(
hparams.preset, json.dumps(preset, indent=4)))
_frontend = getattr(frontend, hparams.frontend)
os.makedirs(checkpoint_dir, exist_ok=True)
# Input dataset definitions
dataset_split = 1 #Split number 1 for training
X = FileSourceDataset(TextDataSource(data_root, speaker_id,dataset_split))
Mel = FileSourceDataset(MelSpecDataSource(data_root, speaker_id,dataset_split))
Y = FileSourceDataset(LinearSpecDataSource(data_root, speaker_id,dataset_split))
# Prepare sampler
frame_lengths = Mel.file_data_source.frame_lengths
sampler = PartialyRandomizedSimilarTimeLengthSampler(
frame_lengths, batch_size=hparams.batch_size)
# Dataset and Dataloader setup
dataset = PyTorchDataset(X, Mel, Y)
data_loader = data_utils.DataLoader(
dataset, batch_size=hparams.batch_size,
num_workers=hparams.num_workers, sampler=sampler,
collate_fn=collate_fn, pin_memory=hparams.pin_memory)
print("dataloader_prepared")
sys.stdout.flush()
# Model
model = build_model()
def get_data_loaders(data_root, speaker_id, test_shuffle=True):
data_loaders = {}
local_conditioning = hparams.cin_channels > 0
for phase in ["train", "test"]:
train = phase == "train"
X = FileSourceDataset(RawAudioDataSource(data_root, speaker_id=speaker_id,
train=train,
test_size=hparams.test_size,
test_num_samples=hparams.test_num_samples,
random_state=hparams.random_state))
if local_conditioning:
Mel = FileSourceDataset(MelSpecDataSource(data_root, speaker_id=speaker_id,
train=train,
test_size=hparams.test_size,
test_num_samples=hparams.test_num_samples,
random_state=hparams.random_state))
assert len(X) == len(Mel)
print("Local conditioning enabled. Shape of a sample: {}.".format(
Mel[0].shape))
else:
Mel = None
print("[{}]: length of the dataset is {}".format(phase, len(X)))
def get_data_loaders(data_root, speaker_id, test_shuffle=True):
data_loaders = {}
local_conditioning = hparams.cin_channels > 0
for phase in ["train", "test"]:
train = phase == "train"
X = FileSourceDataset(RawAudioDataSource(data_root, speaker_id=speaker_id,
train=train,
test_size=hparams.test_size,
test_num_samples=hparams.test_num_samples,
random_state=hparams.random_state))
if local_conditioning:
Mel = FileSourceDataset(MelSpecDataSource(data_root, speaker_id=speaker_id,
train=train,
test_size=hparams.test_size,
test_num_samples=hparams.test_num_samples,
random_state=hparams.random_state))
assert len(X) == len(Mel)
print("Local conditioning enabled. Shape of a sample: {}.".format(
Mel[0].shape))
else:
Mel = None
print("[{}]: length of the dataset is {}".format(phase, len(X)))