Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
length=p_length_e1)
log_probs_e1 = jasper_decoder(encoder_output=encoded_e1)
predictions_e1 = greedy_decoder(log_probs=log_probs_e1)
loss_e1 = ctc_loss(log_probs=log_probs_e1,
targets=transcript_e1,
input_length=encoded_len_e1,
target_length=transcript_len_e1)
print('\n\n\n================================')
print("Total number of parameters: {0}".format(
jasper_decoder.num_weights + jasper_encoder.num_weights))
print('================================')
# Callbacks needed to print info to console and Tensorboard
train_callback = nemo.core.SimpleLossLoggerCallback(
tensor_list2str=lambda x: str(x[0].item()),
tb_writer=tb_writer,
tensor_list2str_evl=lambda x: monitor_asr_train_progress(x, labels=labels))
eval_callback1 = nemo.core.EvaluatorCallback(
eval_tensors=[loss_e1, predictions_e1, transcript_e1, transcript_len_e1],
user_iter_callback=lambda x, y: process_evaluation_batch(
x, y, labels=labels),
user_epochs_done_callback=lambda x: process_evaluation_epoch(x,
tag="DEV-CLEAN"),
eval_step=500,
tb_writer=tb_writer)
def lr_policy(initial_lr, step, N):
res = initial_lr * ((N - step + 1) / N) ** 2
train_loss = loss(log_probs=log_probs, target_ids=labels)
# evaluation pipeline
src_, src_mask_, labels_ = eval_data_layer()
src_hiddens_ = encoder(input_ids=src_, input_mask_src=src_mask_)
log_probs_ = log_softmax(hidden_states=src_hiddens_)
eval_loss = loss(log_probs=log_probs_, target_ids=labels_)
def print_loss(x):
loss = str(x[0].item())
print(f"Training loss: {loss}")
# callback which prints training loss once in a while
callback_train = nemo.core.SimpleLossLoggerCallback(
tensors=[train_loss],
step_freq=100,
print_func=print_loss,
get_tb_values=lambda x: [["loss", x[0]]],
tb_writer=tb_writer)
# callback which calculates evaluation loss
callback_eval = nemo.core.EvaluatorCallback(
eval_tensors=[eval_loss],
user_iter_callback=eval_iter_callback,
user_epochs_done_callback=eval_epochs_done_callback,
eval_step=args.eval_freq,
tb_writer=tb_writer)
# callback which saves checkpoints once in a while
callback_ckpt = nemo.core.CheckpointCallback(
input_ids, input_type_ids, eval_input_mask, eval_labels, eval_seq_ids = \
eval_data_layer()
hidden_states = bert_model(
input_ids=input_ids,
token_type_ids=input_type_ids,
attention_mask=eval_input_mask)
eval_loss, eval_logits = ner_loss(
hidden_states=hidden_states,
labels=eval_labels,
input_mask=eval_input_mask)
# Create trainer and execute training action
callback_train = nemo.core.SimpleLossLoggerCallback(
tensor_list2str=lambda x: "{:.3f}".format(x[0].item()),
tb_writer=tb_writer,
step_freq=100)
# Instantiate an optimizer to perform `train` action
optimizer = neural_factory.get_trainer(
params={
"optimizer_kind": args.optimizer_kind,
"optimization_params": {
"num_epochs": args.num_epochs,
"lr": args.lr,
"weight_decay": args.weight_decay,
"amsgrad": True
}})
train_data_size = len(train_data_layer)
(eval_src_ids, eval_src_lens, eval_tgt_ids,
eval_tgt_lens, eval_gate_labels, eval_turn_domain) = eval_data_layer()
outputs, hidden = encoder(inputs=eval_src_ids, input_lens=eval_src_lens)
eval_point_outputs, eval_gate_outputs = decoder(encoder_hidden=hidden,
encoder_outputs=outputs,
input_lens=eval_src_lens,
src_ids=eval_src_ids,
targets=eval_tgt_ids)
eval_loss = ptr_loss_fn(logits=eval_point_outputs,
targets=eval_tgt_ids,
mask=eval_tgt_lens)
eval_tensors = [eval_loss, eval_point_outputs, eval_gate_outputs,
eval_gate_labels, eval_turn_domain]
# Create callbacks for train and eval modes
train_callback = nemo.core.SimpleLossLoggerCallback(
tensors=[train_loss],
print_func=lambda x: print('Loss:', str(np.round(x[0].item(), 3))),
tb_writer=nf.tb_writer,
get_tb_values=lambda x: [["loss", x[0]]],
step_freq=steps_per_epoch)
eval_callback = nemo.core.EvaluatorCallback(
eval_tensors=eval_tensors,
user_iter_callback=lambda x, y: eval_iter_callback(
x, y, data_layer),
user_epochs_done_callback=lambda x: eval_epochs_done_callback(
x, f'{nf.work_dir}/graphs'),
tb_writer=nf.tb_writer,
eval_step=steps_per_epoch)
logger.info('Config:')
logger.info(pformat(cfg))
num_data = cfg['input']['train']['num_data']
steps_per_epoch = cfg['optimization']['steps_per_epoch']
total_steps = cfg['optimization']['total_steps']
logger.info(f'Num data: {num_data}\n'
f'Steps per epoch: {steps_per_epoch}\n'
f'Total steps: {total_steps}')
# TODO: Workaround?
dag_callbacks[0].tb_writer = tb_writer
dag_callbacks[1].tb_writer = tb_writer
# Callbacks
train_callback = nemo.core.SimpleLossLoggerCallback(
tensors=[train_loss],
print_func=lambda x: logger.info(f"Loss: {x[0].item()}"),
get_tb_values=lambda x: [("loss", x[0])],
tb_writer=tb_writer
)
log_callbacks = [train_callback]
target = cfg['target']
labels = target['labels']
specials = {f'{ss.name}_id': target[f'{ss.name}_id'] for ss in sss}
for name, tensors in evals:
eval_callback = nemo.core.EvaluatorCallback(
# TODO: Should be fixed soon, so we don't need to pass exactly list
eval_tensors=list(tensors),
user_iter_callback=partial(
process_evaluation_batch,
labels=labels,
encoded_length=transcript_len,
mel_target=spec_target)
mel_postnet = t2_postnet(mel_input=mel_decoder)
gate_target = makegatetarget(
mel_target=spec_target, target_len=spec_target_len)
loss_t = t2_loss(
mel_out=mel_decoder,
mel_out_postnet=mel_postnet,
gate_out=gate,
mel_target=spec_target,
gate_target=gate_target,
target_len=spec_target_len,
seq_len=audio_len)
# Callbacks needed to print info to console and Tensorboard
train_callback = nemo.core.SimpleLossLoggerCallback(
tensors=[loss_t, spec_target, mel_postnet, gate, gate_target,
alignments],
print_func=lambda x: print(f"Loss: {x[0].data}"),
log_to_tb_func=partial(
tacotron2_log_to_tb_func, log_images=True,
log_images_freq=log_freq),
tb_writer=neural_factory.tb_writer,
)
chpt_callback = nemo.core.CheckpointCallback(
folder=neural_factory.checkpoint_dir,
step_freq=checkpoint_save_freq)
callbacks = [train_callback, chpt_callback]
return loss_t, callbacks, steps_per_epoch
def outputs2words(tensors, vocab):
source_ids = tensors[1][:, 0].cpu().numpy().tolist()
response_ids = tensors[2][:, 0].cpu().numpy().tolist()
tgt_ids = tensors[3][:, 0].cpu().numpy().tolist()
source = list(map(lambda x: vocab[x], source_ids))
response = list(map(lambda x: vocab[x], response_ids))
target = list(map(lambda x: vocab[x], tgt_ids))
source = ' '.join([s for s in source if s != 'EOS' and s != 'PAD'])
response = ' '.join([s for s in response if s != 'EOS' and s != 'PAD'])
target = ' '.join([s for s in target if s != 'EOS' and s != 'PAD'])
print(f"Train Loss:{str(tensors[0].item())}")
print(f"SOURCE: {source} <---> PREDICTED RESPONSE: {response} "
f"<---> TARGET: {target}")
callback = nemo.core.SimpleLossLoggerCallback(
tensors=[loss, src, outputs_inf, tgt],
print_func=lambda x: outputs2words(x, dl.voc.index2word)
)
# start training
nf.train(
tensors_to_optimize=[loss],
callbacks=[callback],
optimizer="adam",
optimization_params={"num_epochs": config["num_epochs"], "lr": 0.001})
processed_signal_t = data_spectr_augmentation(
input_spec=processed_signal_t)
encoded_t, encoded_len_t = jasper_encoder(
audio_signal=processed_signal_t,
length=p_length_t)
log_probs_t = jasper_decoder(encoder_output=encoded_t)
predictions_t = greedy_decoder(log_probs=log_probs_t)
loss_t = ctc_loss(
log_probs=log_probs_t,
targets=transcript_t,
input_length=encoded_len_t,
target_length=transcript_len_t)
# Callbacks needed to print info to console and Tensorboard
train_callback = nemo.core.SimpleLossLoggerCallback(
tensors=[loss_t, predictions_t, transcript_t, transcript_len_t],
print_func=partial(
monitor_asr_train_progress,
labels=vocab,
eval_metric='CER',
logger=logger),
step_freq=args.train_eval_freq,
get_tb_values=lambda x: [("loss", x[0])],
tb_writer=neural_factory.tb_writer,
)
chpt_callback = nemo.core.CheckpointCallback(
folder=neural_factory.checkpoint_dir,
step_freq=args.checkpoint_save_freq)
callbacks = [train_callback, chpt_callback]