Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _test_get_strided_helper(self, num_samples, window_size, window_shift, snip_edges):
waveform = torch.arange(num_samples).float()
output = kaldi._get_strided(waveform, window_size, window_shift, snip_edges)
# from NumFrames in feature-window.cc
n = window_size
if snip_edges:
m = 0 if num_samples < window_size else 1 + (num_samples - window_size) // window_shift
else:
m = (num_samples + (window_shift // 2)) // window_shift
self.assertTrue(output.dim() == 2)
self.assertTrue(output.shape[0] == m and output.shape[1] == n)
window = torch.empty((m, window_size))
for r in range(m):
extract_window(window, waveform, r, window_size, window_shift, snip_edges)
self.assertTrue(torch.allclose(window, output))
def generate_rand_window_type():
# Generates a random window type
return torchaudio.compliance.kaldi.WINDOWS[random.randint(0, len(torchaudio.compliance.kaldi.WINDOWS) - 1)]
import os
import torch
import random
import kaldiio as kio
import numpy as np
import torchaudio as ta
from torch.utils.data import Dataset, DataLoader
from prefetch_generator import BackgroundGenerator
PAD = 0
EOS = 1
BOS = 1
UNK = 2
MASK = 2
unk = ''
compute_fbank = ta.compliance.kaldi.fbank
def load_vocab(vocab_file):
# unit2idx = {'<s>': 0, '': 1, '': 2}
unit2idx = {}
with open(os.path.join(vocab_file), 'r', encoding='utf-8') as v:
for line in v:
unit, idx = line.strip().split()
unit2idx[unit] = int(idx)
return unit2idx
def normalization(feature):
std, mean = torch.std_mean(feature, dim=0)
return (feature - mean) / std
</s>
def speech_to_mfcc(self, speech_in):
"""
Extracts 13 Mel Frequency Cepstral Coefficients (MFCC) from input utterance.
Args:
speech_in (tuple(np.array), int): The utterance, represented as array and the sampling rate
Returns:
np.array: The extracted features of the utterance
"""
speech = torch.from_numpy(speech_in[0]).unsqueeze(0)
mfcc = torchaudio.compliance.kaldi.mfcc(
speech,
sample_frequency=speech_in[1]
)
return {'mfcc': mfcc}