How to use the nnmnkwii.datasets.FileSourceDataset function in nnmnkwii

To help you get started, we’ve selected a few nnmnkwii examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github r9y9 / nnmnkwii / tests / test_real_datasets.py View on Github external
# Multi speakers
    data_source = MyFileDataSource(
        DATA_DIR, speakers=["SF1", "SF2"], max_files=max_files)
    X = FileSourceDataset(data_source)
    assert len(X) == max_files

    # Speaker labels
    Y = data_source.labels
    assert np.all(Y[:max_files // 2] == 0)
    assert np.all(Y[max_files // 2:] == 1)

    # Custum speaker id
    data_source = MyFileDataSource(
        DATA_DIR, speakers=["SF1", "SF2"], max_files=max_files,
        labelmap={"SF1": 1, "SF2": 0})
    X = FileSourceDataset(data_source)
    Y = data_source.labels
    assert np.all(Y[:max_files // 2] == 1)
    assert np.all(Y[max_files // 2:] == 0)

    # Use all data
    data_source = MyFileDataSource(
        DATA_DIR, speakers=["SF1", "SF2"], max_files=None)
    X = FileSourceDataset(data_source)
    assert len(X) == 162 * 2
github r9y9 / nnmnkwii / tests / test_real_datasets.py View on Github external
super(MyWavFileDataSource, self).__init__(data_root, speakers, labelmap)
            self.alpha = pysptk.util.mcepalpha(48000)

        def collect_features(self, path):
            fs, x = wavfile.read(path)
            assert fs == 48000
            x = x.astype(np.float64)
            f0, timeaxis = pyworld.dio(x, fs, frame_period=5)
            f0 = pyworld.stonemask(x, f0, timeaxis, fs)
            spectrogram = pyworld.cheaptrick(x, f0, timeaxis, fs)
            spectrogram = trim_zeros_frames(spectrogram)
            mc = pysptk.sp2mc(spectrogram, order=24, alpha=self.alpha)
            return mc.astype(np.float32)

    data_source = MyWavFileDataSource(DATA_DIR, speakers=["225"])
    X = FileSourceDataset(data_source)
    print(X[0].shape)
github r9y9 / nnmnkwii / tests / test_pack_pad_sequence.py View on Github external
def _get_small_datasets(padded=False, duration=False):
    if duration:
        X, Y = example_file_data_sources_for_duration_model()
    else:
        X, Y = example_file_data_sources_for_acoustic_model()
    if padded:
        X = PaddedFileSourceDataset(X, padded_length=1000)
        Y = PaddedFileSourceDataset(Y, padded_length=1000)
    else:
        X = FileSourceDataset(X)
        Y = FileSourceDataset(Y)
    return X, Y
github r9y9 / nnmnkwii / tests / test_real_datasets.py View on Github external
warn("Data doesn't exist at {}".format(DATA_DIR))
        return

    class MyTextDataSource(jsut.TranscriptionDataSource):
        def __init__(self, data_root, subsets):
            super(MyTextDataSource, self).__init__(data_root, subsets)

        def collect_features(self, text):
            return text

    data_source = MyTextDataSource(DATA_DIR, subsets=["basic5000"])
    X1 = FileSourceDataset(data_source)
    assert X1[0] == u"水をマレーシアから買わなくてはならないのです。"

    data_source = MyTextDataSource(DATA_DIR, subsets=["travel1000"])
    X2 = FileSourceDataset(data_source)
    assert X2[0] == u"あなたの荷物は、ロサンゼルスに残っています。"

    # Multiple subsets
    data_source = MyTextDataSource(DATA_DIR, subsets=["basic5000", "travel1000"])
    X3 = FileSourceDataset(data_source)
    assert X3[0] == u"水をマレーシアから買わなくてはならないのです。"
    assert len(X3) == len(X1) + len(X2)

    # All subsets
    data_source = MyTextDataSource(DATA_DIR, subsets=jsut.available_subsets)
    X = FileSourceDataset(data_source)
    # As of 2017/11/2. There were 30 missing wav files.
    # This should be 7696
    assert len(X) == 7696

    class MyWavFileDataSource(jsut.WavFileDataSource):
github r9y9 / nnmnkwii / tests / test_datasets.py View on Github external
def __test_wrong_num_args():
        X = FileSourceDataset(WrongNumberOfArgsDataSource())
        X[0]
github HaiFengZeng / clari_wavenet_vocoder / train.py View on Github external
def get_data_loaders(data_root, speaker_id, test_shuffle=True):
    data_loaders = {}
    local_conditioning = hparams.cin_channels > 0
    for phase in ["train", "test"]:
        train = phase == "train"
        X = FileSourceDataset(RawAudioDataSource(data_root, speaker_id=speaker_id,
                                                 train=train,
                                                 test_size=hparams.test_size,
                                                 test_num_samples=hparams.test_num_samples,
                                                 random_state=hparams.random_state))
        if local_conditioning:
            Mel = FileSourceDataset(MelSpecDataSource(data_root, speaker_id=speaker_id,
                                                      train=train,
                                                      test_size=hparams.test_size,
                                                      test_num_samples=hparams.test_num_samples,
                                                      random_state=hparams.random_state))
            assert len(X) == len(Mel)
            print("Local conditioning enabled. Shape of a sample: {}.".format(
                Mel[0].shape))
        else:
            Mel = None
        print("[{}]: length of the dataset is {}".format(phase, len(X)))

        if train:
            lengths = np.array(X.file_data_source.lengths)
            # Prepare sampler
            sampler = PartialyRandomizedSimilarTimeLengthSampler(
                lengths, batch_size=hparams.batch_size)
github SforAiDl / Neural-Voice-Cloning-With-Few-Samples / speaker_adaptatation-libri.py View on Github external
if hparams.preset is not None and hparams.preset != "":
        preset = hparams.presets[hparams.preset]
        import json
        hparams.parse_json(json.dumps(preset))
        print("Override hyper parameters with preset \"{}\": {}".format(
            hparams.preset, json.dumps(preset, indent=4)))

    _frontend = getattr(frontend, hparams.frontend)

    os.makedirs(checkpoint_dir, exist_ok=True)

    # Input dataset definitions
    dataset_split = 1 #Split number 1 for training
    X = FileSourceDataset(TextDataSource(data_root, speaker_id,dataset_split))
    Mel = FileSourceDataset(MelSpecDataSource(data_root, speaker_id,dataset_split))
    Y = FileSourceDataset(LinearSpecDataSource(data_root, speaker_id,dataset_split))

    # Prepare sampler
    frame_lengths = Mel.file_data_source.frame_lengths
    sampler = PartialyRandomizedSimilarTimeLengthSampler(
        frame_lengths, batch_size=hparams.batch_size)

    # Dataset and Dataloader setup
    dataset = PyTorchDataset(X, Mel, Y)
    data_loader = data_utils.DataLoader(
        dataset, batch_size=hparams.batch_size,
        num_workers=hparams.num_workers, sampler=sampler,
        collate_fn=collate_fn, pin_memory=hparams.pin_memory)
    print("dataloader_prepared")
    sys.stdout.flush()
    # Model
    model = build_model()
github HaiFengZeng / clari_wavenet_vocoder / train_student.py View on Github external
def get_data_loaders(data_root, speaker_id, test_shuffle=True):
    data_loaders = {}
    local_conditioning = hparams.cin_channels > 0
    for phase in ["train", "test"]:
        train = phase == "train"
        X = FileSourceDataset(RawAudioDataSource(data_root, speaker_id=speaker_id,
                                                 train=train,
                                                 test_size=hparams.test_size,
                                                 test_num_samples=hparams.test_num_samples,
                                                 random_state=hparams.random_state))
        if local_conditioning:
            Mel = FileSourceDataset(MelSpecDataSource(data_root, speaker_id=speaker_id,
                                                      train=train,
                                                      test_size=hparams.test_size,
                                                      test_num_samples=hparams.test_num_samples,
                                                      random_state=hparams.random_state))
            assert len(X) == len(Mel)
            print("Local conditioning enabled. Shape of a sample: {}.".format(
                Mel[0].shape))
        else:
            Mel = None
        print("[{}]: length of the dataset is {}".format(phase, len(X)))
github HaiFengZeng / clari_wavenet_vocoder / train.py View on Github external
def get_data_loaders(data_root, speaker_id, test_shuffle=True):
    data_loaders = {}
    local_conditioning = hparams.cin_channels > 0
    for phase in ["train", "test"]:
        train = phase == "train"
        X = FileSourceDataset(RawAudioDataSource(data_root, speaker_id=speaker_id,
                                                 train=train,
                                                 test_size=hparams.test_size,
                                                 test_num_samples=hparams.test_num_samples,
                                                 random_state=hparams.random_state))
        if local_conditioning:
            Mel = FileSourceDataset(MelSpecDataSource(data_root, speaker_id=speaker_id,
                                                      train=train,
                                                      test_size=hparams.test_size,
                                                      test_num_samples=hparams.test_num_samples,
                                                      random_state=hparams.random_state))
            assert len(X) == len(Mel)
            print("Local conditioning enabled. Shape of a sample: {}.".format(
                Mel[0].shape))
        else:
            Mel = None
        print("[{}]: length of the dataset is {}".format(phase, len(X)))