How to use the torchaudio.transforms.MuLawEncoding function in torchaudio

To help you get started, we’ve selected a few torchaudio examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github yoyololicon / pytorch_FFTNet / FFTNet_vocoder.py View on Github external
inputs = inputs[:x[-1].astype(int)]
        targets = targets[:x[-1].astype(int)]
        inputs = inputs[:len(inputs) // seq_M * seq_M]
        targets = targets[:len(targets) // seq_M * seq_M]

        h = f(np.arange(1, len(inputs) + 1))

        train_wav.append(inputs)
        train_features.append(h)
        train_targets.append(targets)

    train_wav = np.concatenate(train_wav)
    train_features = np.vstack(train_features)
    train_targets = np.concatenate(train_targets)

    enc = transforms.MuLawEncoding(channels)
    dec = transforms.MuLawExpanding(channels)

    train_wav = enc(train_wav)
    train_targets = enc(train_targets)

    scaler = StandardScaler()
    train_features = scaler.fit_transform(train_features)

    train_wav = train_wav.reshape(-1, seq_M)
    train_features = np.rollaxis(train_features.reshape(-1, seq_M, features_size), 2, 1)
    train_targets = train_targets.reshape(-1, seq_M)

    train_wav = torch.from_numpy(train_wav).long()
    train_features = torch.from_numpy(train_features).float()
    train_targets = torch.from_numpy(train_targets).long()
    print(train_features.shape, train_wav.shape, train_targets.shape)
github pytorch / audio / test / test_transforms.py View on Github external
def test_batch_mulaw(self):
        waveform, sample_rate = torchaudio.load(self.test_filepath)  # (2, 278756), 44100

        # Single then transform then batch
        waveform_encoded = transforms.MuLawEncoding()(waveform)
        expected = waveform_encoded.unsqueeze(0).repeat(3, 1, 1)

        # Batch then transform
        waveform_batched = waveform.unsqueeze(0).repeat(3, 1, 1)
        computed = transforms.MuLawEncoding()(waveform_batched)

        # shape = (3, 2, 201, 1394)
        self.assertTrue(computed.shape == expected.shape, (computed.shape, expected.shape))
        self.assertTrue(torch.allclose(computed, expected))

        # Single then transform then batch
        waveform_decoded = transforms.MuLawDecoding()(waveform_encoded)
        expected = waveform_decoded.unsqueeze(0).repeat(3, 1, 1)

        # Batch then transform
        computed = transforms.MuLawDecoding()(computed)

        # shape = (3, 2, 201, 1394)
        self.assertTrue(computed.shape == expected.shape, (computed.shape, expected.shape))
        self.assertTrue(torch.allclose(computed, expected))
github pytorch / audio / test / test_transforms.py View on Github external
def test_scriptmodule_MuLawEncoding(self):
        tensor = torch.rand((1, 10))
        _test_script_module(transforms.MuLawEncoding, tensor)
github pytorch / audio / test / test_transforms.py View on Github external
def test_batch_mulaw(self):
        waveform, sample_rate = torchaudio.load(self.test_filepath)  # (2, 278756), 44100

        # Single then transform then batch
        waveform_encoded = transforms.MuLawEncoding()(waveform)
        expected = waveform_encoded.unsqueeze(0).repeat(3, 1, 1)

        # Batch then transform
        waveform_batched = waveform.unsqueeze(0).repeat(3, 1, 1)
        computed = transforms.MuLawEncoding()(waveform_batched)

        # shape = (3, 2, 201, 1394)
        self.assertTrue(computed.shape == expected.shape, (computed.shape, expected.shape))
        self.assertTrue(torch.allclose(computed, expected))

        # Single then transform then batch
        waveform_decoded = transforms.MuLawDecoding()(waveform_encoded)
        expected = waveform_decoded.unsqueeze(0).repeat(3, 1, 1)

        # Batch then transform
        computed = transforms.MuLawDecoding()(computed)
github pytorch / audio / test / test_transforms.py View on Github external
def test_mu_law_companding(self):

        quantization_channels = 256

        waveform = self.waveform.clone()
        waveform /= torch.abs(waveform).max()
        self.assertTrue(waveform.min() >= -1. and waveform.max() <= 1.)

        waveform_mu = transforms.MuLawEncoding(quantization_channels)(waveform)
        self.assertTrue(waveform_mu.min() >= 0. and waveform_mu.max() <= quantization_channels)

        waveform_exp = transforms.MuLawDecoding(quantization_channels)(waveform_mu)
        self.assertTrue(waveform_exp.min() >= -1. and waveform_exp.max() <= 1.)
github pytorch / audio / test / test_jit.py View on Github external
def test_scriptmodule_MuLawEncoding(self):
        tensor = torch.rand((1, 10), device="cuda")

        self._test_script_module(tensor, transforms.MuLawEncoding)
github yoyololicon / pytorch_FFTNet / FFTNet_generator.py View on Github external
depth = args.depth
    radixs = [2] * depth
    N = np.prod(radixs)
    channels = args.channels
    lr = args.lr
    steps = args.steps
    c = args.c
    generation_time = args.file_size
    filename = args.outfile

    maxlen = 50000
    print('==> Downloading YesNo Dataset..')
    transform = transforms.Compose(
        [transforms.Scale(),
         transforms.PadTrim(maxlen),
         transforms.MuLawEncoding(quantization_channels=channels)])
    data = torchaudio.datasets.YESNO('./data', download=True, transform=transform)
    data_loader = DataLoader(data, batch_size=batch_size, num_workers=4, shuffle=True)

    print('==> Building model..')
    net = general_FFTNet(radixs, 128, channels).cuda()

    print(sum(p.numel() for p in net.parameters() if p.requires_grad), "of parameters.")

    optimizer = optim.Adam(net.parameters(), lr=lr)
    criterion = torch.nn.CrossEntropyLoss()

    print("Start Training.")
    a = datetime.now().replace(microsecond=0)

    step = 0
    seq_idx = torch.arange(seq_M).view(1, -1)
github yoyololicon / pytorch_FFTNet / utils.py View on Github external
def encoder(quantization_channels):
    return MuLawEncoding(quantization_channels)