Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _build_mel_basis(hparams):
assert hparams.fmax <= hparams.sample_rate // 2
return librosa.filters.mel(hparams.sample_rate,
hparams.n_fft,
n_mels=hparams.num_mels,
fmin=hparams.fmin,
fmax=hparams.fmax)
y, _ = librosa.effects.trim(y)
# Preemphasis
y = np.append(y[0], y[1:] - hp.preemphasis * y[:-1])
# stft
linear = librosa.stft(y=y,
n_fft=hp.n_fft,
hop_length=hp.hop_length,
win_length=hp.win_length)
# magnitude spectrogram
mag = np.abs(linear) # (1+n_fft//2, T)
# mel spectrogram
mel_basis = librosa.filters.mel(hp.sr, hp.n_fft, hp.n_mels) # (n_mels, 1+n_fft//2)
mel = np.dot(mel_basis, mag ** 2) # (n_mels, t)
# Transpose
mel = mel.T.astype(np.float32) # (T, n_mels)
mag = mag.T.astype(np.float32) # (T, 1+n_fft//2)
# Sequence length
dones = np.ones_like(mel[:, 0])
# Padding
mel = np.pad(mel, ((0, hp.T_y - len(mel)), (0, 0)), mode="constant")[:hp.T_y]
mag = np.pad(mag, ((0, hp.T_y - len(mag)), (0, 0)), mode="constant")[:hp.T_y]
dones = np.pad(dones, ((0, hp.T_y - len(dones))), mode="constant")[:hp.T_y]
# Log
mel = np.log10(mel + 1e-8)
>>> librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max, top_db=None),
... y_axis='log', x_axis='time')
>>> plt.colorbar()
>>> plt.title('Original STFT')
>>> plt.subplot(2,1,2)
>>> librosa.display.specshow(librosa.amplitude_to_db(np.abs(S_inv - S),
... ref=S.max(), top_db=None),
... vmax=0, y_axis='log', x_axis='time', cmap='magma')
>>> plt.title('Residual error (dB)')
>>> plt.colorbar()
>>> plt.tight_layout()
>>> plt.show()
'''
# Construct a mel basis with dtype matching the input data
mel_basis = filters.mel(sr, n_fft, n_mels=M.shape[0],
dtype=M.dtype,
**kwargs)
# Find the non-negative least squares solution, and apply
# the inverse exponent.
# We'll do the exponentiation in-place.
inverse = nnls(mel_basis, M)
return np.power(inverse, 1./power, out=inverse)
def build_mel_basis():
return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, fmin=hp.fmin)
def freq2mel(f_data,sr,fft_size,n_mel,fmax=8000):
pre_matrix = librosa.filters.mel(sr, fft_size, n_mel, fmax=fmax)
matrix = pre_matrix.T / np.sum(pre_matrix.T,axis=0)
return np.dot(f_data,matrix)
Returns:
mfcc - coefficients
mag - magnitude spectrum
mel
'''
# Pre-emphasis
y_preem = preemphasis(wav, coeff=preemphasis_coeff)
# Get spectrogram
D = librosa.stft(y=y_preem, n_fft=n_fft,
hop_length=hop_length, win_length=win_length)
mag = np.abs(D)
# Get mel-spectrogram
mel_basis = librosa.filters.mel(
hp.Default.sr, hp.Default.n_fft, hp.Default.n_mels) # (n_mels, 1+n_fft//2)
mel = np.dot(mel_basis, mag) # (n_mels, t) # mel spectrogram
# Get mfccs
db = librosa.amplitude_to_db(mel)
mfccs = np.dot(librosa.filters.dct(hp.Default.n_mfcc, db.shape[0]), db)
# Log
mag = np.log(mag + sys.float_info.epsilon)
mel = np.log(mel + sys.float_info.epsilon)
# Normalization
# self.y_log_spec = (y_log_spec - hp.mean_log_spec) / hp.std_log_spec
# self.y_log_spec = (y_log_spec - hp.min_log_spec) / (hp.max_log_spec - hp.min_log_spec)
return mfccs.T, mag.T, mel.T # (t, n_mfccs), (t, 1+n_fft/2), (t, n_mels)
y, _ = librosa.effects.trim(y)
# Preemphasis
y = np.append(y[0], y[1:] - hp.preemphasis * y[:-1])
# stft
linear = librosa.stft(y=y,
n_fft=hp.n_fft,
hop_length=hp.hop_length,
win_length=hp.win_length)
# magnitude spectrogram
mag = np.abs(linear) # (1+n_fft//2, T)
# mel spectrogram
mel_basis = librosa.filters.mel(hp.sr, hp.n_fft, hp.n_mels) # (n_mels, 1+n_fft//2)
mel = np.dot(mel_basis, mag) # (n_mels, t)
# to decibel
mel = 20 * np.log10(np.maximum(1e-5, mel))
mag = 20 * np.log10(np.maximum(1e-5, mag))
# normalize
mel = np.clip((mel - hp.ref_db + hp.max_db) / hp.max_db, 1e-8, 1)
mag = np.clip((mag - hp.ref_db + hp.max_db) / hp.max_db, 1e-8, 1)
# Transpose
mel = mel.T.astype(np.float32) # (T, n_mels)
mag = mag.T.astype(np.float32) # (T, 1+n_fft//2)
return mel, mag
>>> S_dB = librosa.power_to_db(S, ref=np.max)
>>> librosa.display.specshow(S_dB, x_axis='time',
... y_axis='mel', sr=sr,
... fmax=8000)
>>> plt.colorbar(format='%+2.0f dB')
>>> plt.title('Mel-frequency spectrogram')
>>> plt.tight_layout()
>>> plt.show()
"""
S, n_fft = _spectrogram(y=y, S=S, n_fft=n_fft, hop_length=hop_length, power=power,
win_length=win_length, window=window, center=center,
pad_mode=pad_mode)
# Build a Mel filter
mel_basis = filters.mel(sr, n_fft, **kwargs)
return np.dot(mel_basis, S)
Returns:
mfcc - coefficients
mag - magnitude spectrum
mel
'''
# Pre-emphasis
y_preem = preemphasis(wav, coeff=preemphasis_coeff)
# Get spectrogram
D = librosa.stft(y=y_preem, n_fft=n_fft,
hop_length=hop_length, win_length=win_length)
mag = np.abs(D)
# Get mel-spectrogram
mel_basis = librosa.filters.mel(
hp.Default.sr, hp.Default.n_fft, hp.Default.n_mels) # (n_mels, 1+n_fft//2)
mel = np.dot(mel_basis, mag) # (n_mels, t) # mel spectrogram
# Get mfccs
db = librosa.amplitude_to_db(mel)
mfccs = np.dot(librosa.filters.dct(hp.Default.n_mfcc, db.shape[0]), db)
# Log
mag = np.log(mag + sys.float_info.epsilon)
mel = np.log(mel + sys.float_info.epsilon)
# Normalization
# self.y_log_spec = (y_log_spec - hp.mean_log_spec) / hp.std_log_spec
# self.y_log_spec = (y_log_spec - hp.min_log_spec) / (hp.max_log_spec - hp.min_log_spec)
return mfccs.T, mag.T, mel.T # (t, n_mfccs), (t, 1+n_fft/2), (t, n_mels)