Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
:return:
"""
start_time = get_time()
print("Extract {} feature...".format(feature_type))
feature = []
train_data = {}
for i in tqdm(range(len(x))):
# extract mfcc feature based on psf, you can look more detail on psf's website.
if feature_type=='MFCC':
_feature = mfcc(x[i])
mfcc_delta = delta(_feature)
_feature = np.hstack((_feature, mfcc_delta))
_feature = preprocessing.scale(_feature)
elif feature_type=='PLP':
_feature = plp(x[i])
mfcc_delta = delta(_feature)
_feature = np.hstack((_feature, mfcc_delta))
_feature = preprocessing.scale(_feature)
else:
raise NameError
# append _feature to feature
feature.append(_feature)
if is_train:
if y[i] in train_data:
train_data[y[i]] = np.vstack((train_data[y[i]], _feature))
else:
train_data[y[i]] = _feature
def record(self):
self.textBrowser.append('Start the recording !')
record(seconds=3)
self.textBrowser.append('3 seconds record has completed.')
_, audio = read(filename='test.wav')
if self.feature_type=='MFCC':
feature = mfcc(audio)[0]
else:
feature = plp(audio)[0]
_delta = delta(feature)
feature = np.hstack((feature, _delta))
feature = preprocessing.scale(feature)
self.feature = feature
os.remove('test.wav')
new_x = []
new_y = []
for i in range(len(x)):
for j in range(x[i].shape[0]//self.sample_rate):
new_x.append(x[i][j*self.sample_rate:(j+1)*self.sample_rate])
new_y.append(y[i])
x = new_x
y = new_y
for i in tqdm(range(len(x))):
# 这里MFCC和PLP默认是16000Hz,注意修改
# mfcc 25ms窗长,10ms重叠
if feature_type == 'MFCC':
_feature = mfcc(x[i], fs=self.sample_rate)[0]
elif feature_type == 'PLP':
_feature = plp(x[i], fs=self.sample_rate)[0]
else:
raise NameError
# 特征出了问题,存在一些无穷大,导致整个网络的梯度爆炸了,需要特殊处理才行
if np.isnan(_feature).sum()>0:
continue
# _feature = np.concatenate([_feature,self.delta(_feature)],axis=1)
# _feature = preprocessing.scale(_feature)
# _feature = preprocessing.StandardScaler().fit_transform(_feature)
# 每2*num为一个输入,并且重叠num
feature.append(_feature)
label.append(y[i])
print(len(feature), feature[0].shape)
self.save(feature, '{}_{}_feature'.format(datatype, feature_type))
self.save(label, '{}_{}_label'.format(datatype, feature_type))
start_time = get_time()
if not os.path.exists('feature'):
os.mkdir('feature')
if not os.path.exists('feature/{}_feature.pkl'.format(feature_type)):
x, y = self.load_data()
print("Extract {} feature...".format(feature_type))
feature = []
label = []
for i in tqdm(range(len(x))):
# 这里MFCC和PLP默认是16000Hz,注意修改
# mfcc 25ms窗长,10ms重叠
if feature_type == 'MFCC':
_feature = mfcc(x[i])[0]
elif feature_type == 'PLP':
_feature = plp(x[i])[0]
else:
raise NameError
_feature = np.concatenate([_feature,self.delta(_feature)],axis=1)
# TODO 兼容i-vector 和 d-vector
_feature = preprocessing.scale(_feature)
num = 10
for j in range(_feature.shape[0]//num-1):
feature.append(_feature[j*num:j*num+2*num])
label.append(y[i])
print(len(feature), feature[0].shape)
self.save(feature, '{}_feature'.format(feature_type))
self.save(label, '{}_label'.format(feature_type))
else:
feature = self.load('{}_feature'.format(feature_type))