Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# transformer: convert int to one-hot vector
class Int2OneHot(object):
def __init__(self, num_labels):
self.num_labels = num_labels
def __call__(self, targets):
one_hots = list()
for t in targets:
one_hot = torch.LongTensor(self.num_labels).zero_()
one_hot[t] = 1
one_hots.append(one_hot)
return one_hots
class BatchTransformer(torchaudio.transforms.Compose):
def __init__(self,
resample=True, sample_rate=params.SAMPLE_RATE,
tempo=True, tempo_range=params.TEMPO_RANGE,
pitch=True, pitch_range=params.PITCH_RANGE,
noise=True, noise_range=params.NOISE_RANGE,
offset=True, offset_range=None,
padding=True, num_padding=None,
window_shift=params.WINDOW_SHIFT, window_size=params.WINDOW_SIZE, nfft=params.NFFT,
unit_frames=params.WIDTH, stride=2, split=False):
if offset and offset_range is None:
offset_range = (0, stride * WIN_SAMP_SHIFT)
if padding and num_padding is None:
pad = int(((params.WIDTH * stride) // 2 - 1) * WIN_SAMP_SHIFT)
num_padding = (pad, pad)
super().__init__([
class MelScale:
forward = torchaudio.transforms.MelScale().forward
class MelSpectrogram:
forward = torchaudio.transforms.MelSpectrogram().forward
class MFCC:
forward = torchaudio.transforms.MFCC().forward
class MuLawEncoding:
forward = torchaudio.transforms.MuLawEncoding().forward
class MuLawDecoding:
forward = torchaudio.transforms.MuLawDecoding().forward
class Resample:
# Resample isn't a script_method
forward = torchaudio.transforms.Resample.forward
seq_M = args.seq_M
batch_size = args.batch_size
depth = args.depth
radixs = [2] * depth
N = np.prod(radixs)
channels = args.channels
lr = args.lr
steps = args.steps
c = args.c
generation_time = args.file_size
filename = args.outfile
maxlen = 50000
print('==> Downloading YesNo Dataset..')
transform = transforms.Compose(
[transforms.Scale(),
transforms.PadTrim(maxlen),
transforms.MuLawEncoding(quantization_channels=channels)])
data = torchaudio.datasets.YESNO('./data', download=True, transform=transform)
data_loader = DataLoader(data, batch_size=batch_size, num_workers=4, shuffle=True)
print('==> Building model..')
net = general_FFTNet(radixs, 128, channels).cuda()
print(sum(p.numel() for p in net.parameters() if p.requires_grad), "of parameters.")
optimizer = optim.Adam(net.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()
print("Start Training.")
a = datetime.now().replace(microsecond=0)
seq_M = args.seq_M
batch_size = args.batch_size
depth = args.depth
radixs = [2] * depth
N = np.prod(radixs)
channels = args.channels
lr = args.lr
steps = args.steps
c = args.c
generation_time = args.file_size
filename = args.outfile
features_size = args.feature_size
print('==> Downloading YesNo Dataset..')
transform = transforms.Compose([transforms.Scale()])
data = torchaudio.datasets.YESNO('./data', download=True, transform=transform)
data_loader = DataLoader(data, batch_size=1, num_workers=2)
print('==> Extracting features..')
train_wav = []
train_features = []
train_targets = []
for batch_idx, (inputs, _) in enumerate(data_loader):
inputs = inputs.view(-1).numpy()
targets = np.roll(inputs, shift=-1)
#h = mfcc(inputs, sr, winlen=winlen, winstep=winstep, numcep=features_size - 1, winfunc=np.hamming)
x = inputs.astype(float)
f0, t = pw.dio(x, sr, f0_floor=40, f0_ceil=500, frame_period=winstep * 1000)
f0 = pw.stonemask(x, f0, t, sr)
spc = pw.cheaptrick(x, f0, t, sr)
seq_M = args.seq_M
batch_size = args.batch_size
depth = args.depth
radixs = [2] * depth
N = np.prod(radixs)
channels = args.channels
lr = args.lr
steps = args.steps
c = args.c
generation_time = args.file_size
filename = args.outfile
maxlen = 50000
print('==> Downloading YesNo Dataset..')
transform = transforms.Compose(
[transforms.Scale(),
transforms.PadTrim(maxlen),
transforms.MuLawEncoding(quantization_channels=channels)])
data = torchaudio.datasets.YESNO('./data', download=True, transform=transform)
data_loader = DataLoader(data, batch_size=batch_size, num_workers=4, shuffle=True)
print('==> Building model..')
net = general_FFTNet(radixs, 128, channels).cuda()
print(sum(p.numel() for p in net.parameters() if p.requires_grad), "of parameters.")
optimizer = optim.Adam(net.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()
print("Start Training.")
a = datetime.now().replace(microsecond=0)