How to use the torchaudio.transforms function in torchaudio

To help you get started, we’ve selected a few torchaudio examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jinserk / pytorch-asr / asr / utils / dataset.py View on Github external
# transformer: convert int to one-hot vector
class Int2OneHot(object):

    def __init__(self, num_labels):
        self.num_labels = num_labels

    def __call__(self, targets):
        one_hots = list()
        for t in targets:
            one_hot = torch.LongTensor(self.num_labels).zero_()
            one_hot[t] = 1
            one_hots.append(one_hot)
        return one_hots


class BatchTransformer(torchaudio.transforms.Compose):

    def __init__(self,
                 resample=True, sample_rate=params.SAMPLE_RATE,
                 tempo=True, tempo_range=params.TEMPO_RANGE,
                 pitch=True, pitch_range=params.PITCH_RANGE,
                 noise=True, noise_range=params.NOISE_RANGE,
                 offset=True, offset_range=None,
                 padding=True, num_padding=None,
                 window_shift=params.WINDOW_SHIFT, window_size=params.WINDOW_SIZE, nfft=params.NFFT,
                 unit_frames=params.WIDTH, stride=2, split=False):
        if offset and offset_range is None:
            offset_range = (0, stride * WIN_SAMP_SHIFT)
        if padding and num_padding is None:
            pad = int(((params.WIDTH * stride) // 2 - 1) * WIN_SAMP_SHIFT)
            num_padding = (pad, pad)
        super().__init__([
github pytorch / audio / torchaudio / _docs.py View on Github external
class MelScale:
    forward = torchaudio.transforms.MelScale().forward


class MelSpectrogram:
    forward = torchaudio.transforms.MelSpectrogram().forward


class MFCC:
    forward = torchaudio.transforms.MFCC().forward


class MuLawEncoding:
    forward = torchaudio.transforms.MuLawEncoding().forward


class MuLawDecoding:
    forward = torchaudio.transforms.MuLawDecoding().forward


class Resample:
    # Resample isn't a script_method
    forward = torchaudio.transforms.Resample.forward
github yoyololicon / pytorch_FFTNet / FFTNet_generator.py View on Github external
seq_M = args.seq_M
    batch_size = args.batch_size
    depth = args.depth
    radixs = [2] * depth
    N = np.prod(radixs)
    channels = args.channels
    lr = args.lr
    steps = args.steps
    c = args.c
    generation_time = args.file_size
    filename = args.outfile

    maxlen = 50000
    print('==> Downloading YesNo Dataset..')
    transform = transforms.Compose(
        [transforms.Scale(),
         transforms.PadTrim(maxlen),
         transforms.MuLawEncoding(quantization_channels=channels)])
    data = torchaudio.datasets.YESNO('./data', download=True, transform=transform)
    data_loader = DataLoader(data, batch_size=batch_size, num_workers=4, shuffle=True)

    print('==> Building model..')
    net = general_FFTNet(radixs, 128, channels).cuda()

    print(sum(p.numel() for p in net.parameters() if p.requires_grad), "of parameters.")

    optimizer = optim.Adam(net.parameters(), lr=lr)
    criterion = torch.nn.CrossEntropyLoss()

    print("Start Training.")
    a = datetime.now().replace(microsecond=0)
github yoyololicon / pytorch_FFTNet / FFTNet_vocoder.py View on Github external
seq_M = args.seq_M
    batch_size = args.batch_size
    depth = args.depth
    radixs = [2] * depth
    N = np.prod(radixs)
    channels = args.channels
    lr = args.lr
    steps = args.steps
    c = args.c
    generation_time = args.file_size
    filename = args.outfile
    features_size = args.feature_size

    print('==> Downloading YesNo Dataset..')
    transform = transforms.Compose([transforms.Scale()])
    data = torchaudio.datasets.YESNO('./data', download=True, transform=transform)
    data_loader = DataLoader(data, batch_size=1, num_workers=2)

    print('==> Extracting features..')
    train_wav = []
    train_features = []
    train_targets = []
    for batch_idx, (inputs, _) in enumerate(data_loader):
        inputs = inputs.view(-1).numpy()
        targets = np.roll(inputs, shift=-1)

        #h = mfcc(inputs, sr, winlen=winlen, winstep=winstep, numcep=features_size - 1, winfunc=np.hamming)
        x = inputs.astype(float)
        f0, t = pw.dio(x, sr, f0_floor=40, f0_ceil=500, frame_period=winstep * 1000)
        f0 = pw.stonemask(x, f0, t, sr)
        spc = pw.cheaptrick(x, f0, t, sr)
github yoyololicon / pytorch_FFTNet / FFTNet_generator.py View on Github external
seq_M = args.seq_M
    batch_size = args.batch_size
    depth = args.depth
    radixs = [2] * depth
    N = np.prod(radixs)
    channels = args.channels
    lr = args.lr
    steps = args.steps
    c = args.c
    generation_time = args.file_size
    filename = args.outfile

    maxlen = 50000
    print('==> Downloading YesNo Dataset..')
    transform = transforms.Compose(
        [transforms.Scale(),
         transforms.PadTrim(maxlen),
         transforms.MuLawEncoding(quantization_channels=channels)])
    data = torchaudio.datasets.YESNO('./data', download=True, transform=transform)
    data_loader = DataLoader(data, batch_size=batch_size, num_workers=4, shuffle=True)

    print('==> Building model..')
    net = general_FFTNet(radixs, 128, channels).cuda()

    print(sum(p.numel() for p in net.parameters() if p.requires_grad), "of parameters.")

    optimizer = optim.Adam(net.parameters(), lr=lr)
    criterion = torch.nn.CrossEntropyLoss()

    print("Start Training.")
    a = datetime.now().replace(microsecond=0)