Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _create_data_set(self):
# used to generate the dataset to test on. this is not used in testing (offline procedure)
test_dirpath = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
test_filepath = os.path.join(test_dirpath, 'assets', 'kaldi_file.wav')
sr = 16000
x = torch.arange(0, 20).float()
# between [-6,6]
y = torch.cos(2 * math.pi * x) + 3 * torch.sin(math.pi * x) + 2 * torch.cos(x)
# between [-2^30, 2^30]
y = (y / 6 * (1 << 30)).long()
# clear the last 16 bits because they aren't used anyways
y = ((y >> 16) << 16).float()
torchaudio.save(test_filepath, y, sr)
sound, sample_rate = torchaudio.load(test_filepath, normalization=False)
print(y >> 16)
self.assertTrue(sample_rate == sr)
self.assertTrue(torch.allclose(y, sound))
def test4_wavenet_audio(self):
try:
import matplotlib.pyplot as plt
except ImportError:
print("install matplotlib for plot of signals")
plt = None
num_samples = 1 << 15
sig, sr = torchaudio.load("test/data/david.wav")
sig = sig[:-(sig.size(0)%3):3]
input = sig[16000:(16000+num_samples)].contiguous()
# write sample for qualitative test
torchaudio.save("test/data/david_16000hz_input_sample.wav", input, sr//3)
input /= torch.abs(input).max()
assert input.min() >= -1. and input.max() <= 1.
input = input.view(1, 1, -1)
labels = input.numpy()
labels = mu_law_encoding(labels, 256)
labels = torch.from_numpy(labels).squeeze().long()
# build network and optimizer
m = FastWaveNet(layers=10,
blocks=4, # number of blocks
residual_channels=16,
dilation_channels=32,
skip_channels=16,
quantization_channels=256,
input_len=num_samples,
audio_channels=1,
# import numpy as np
# print(out.detach().numpy() < 0)
#TODO: Upsampling?
# Check out path
out_path = os.path.expanduser(args.eval_out_path)
if not os.path.exists(out_path):
os.mkdir(out_path)
out_filename = '{}_{}.wav'.format(os.path.basename(audio_path).split('.')[0], args.eval_speaker_id)
out_file_path = os.path.join(out_path, out_filename)
# Save as audio
torchaudio.save(filepath=out_file_path, src=out, sample_rate=sr)
step += 1
if step > steps:
break
"""
print("Training time cost:", datetime.now().replace(microsecond=0) - a)
print("Start to generate some noise...")
net = net.cpu()
net.eval()
with torch.no_grad():
a = datetime.now().replace(microsecond=0)
generation = net.fast_generate(int(sr * generation_time), c=c)
decoder = transforms.MuLawExpanding(channels)
generation = decoder(generation)
torchaudio.save(filename, generation, sr)
print("Generation time cost:", datetime.now().replace(microsecond=0) - a)
r"""Saves a Tensor with audio signal to disk as a standard format like mp3, wav, etc.
The default options have changed as of torchaudio 0.2 and this function maintains
option defaults from version 0.1.
Args:
filepath (str): Path to audio file
src (torch.Tensor): An input 2D Tensor of shape `[L x C]` where L is
the number of audio frames, C is the number of channels
sample_rate (int): The sample-rate of the audio to be saved
precision (int, optional): The bit-precision of the audio to be saved. (Default: ``32``)
Example
>>> data, sample_rate = torchaudio.legacy.load('foo.mp3')
>>> torchaudio.legacy.save('foo.wav', data, sample_rate)
"""
torchaudio.save(filepath, src, sample_rate, precision, False)
print(step, "{:.4f}".format(loss.item()))
step += 1
if step > steps:
break
print("Training time cost:", datetime.now().replace(microsecond=0) - a)
print("Start to generate some noise...")
net = net.cpu()
net.eval()
with torch.no_grad():
a = datetime.now().replace(microsecond=0)
generation = net.fast_generate(h=test_features, c=c)
generation = dec(generation)
torchaudio.save(filename, generation, sr)
cost = datetime.now().replace(microsecond=0) - a
print("Generation time cost:", cost, ". Speed:", generation.size(0)/cost.total_seconds(), "samples/sec.")
def write_wav(fname, src, sample_rate):
'''
Write wav file
input:
fname: wav file path
src: frames of audio
sample_rate: An integer which is the sample rate of the audio
output:
None
'''
torchaudio.save(fname, src, sample_rate)