Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
print("Loading checkpoints...")
if pre_v09_model:
print(" Converting pre v0.9 checkpoint...")
ckpt = torch.load(nn_encoder)
new_ckpt = {}
for k, v in ckpt.items():
new_k = k.replace('.conv.', '.mconv.')
if len(v.shape) == 3:
new_k = new_k.replace('.weight', '.conv.weight')
new_ckpt[new_k] = v
jasper_encoder.load_state_dict(new_ckpt)
else:
jasper_encoder.restore_from(nn_encoder)
jasper_decoder.restore_from(nn_decoder)
nf = nemo.core.NeuralModuleFactory(create_tb_writer=False)
print("Exporting encoder...")
nf.deployment_export(jasper_encoder, nn_onnx_encoder,
nemo.core.neural_factory.DeploymentFormat.ONNX,
torch.zeros(batch_size,
num_encoder_input_features,
time_steps,
dtype=torch.float, device="cuda:0"))
print("Exporting decoder...")
nf.deployment_export(jasper_decoder, nn_onnx_decoder,
nemo.core.neural_factory.DeploymentFormat.ONNX,
(torch.zeros(batch_size,
num_decoder_input_features,
time_steps // 2,
dtype=torch.float, device="cuda:0")))
print("Export completed successfully.")
opt_level)
name = construct_name('ZeroDS-Jasper10x5', lr, batch_size, num_gpus, num_epochs,
weight_decay)
tb_writer = SummaryWriter(name)
if args.local_rank is not None:
device = nemo.core.DeviceType.AllGpu
print('Doing ALL GPU')
else:
device = nemo.core.DeviceType.GPU
# instantiate Neural Factory with supported backend
neural_factory = nemo.core.NeuralModuleFactory(
backend=nemo.core.Backend.PyTorch,
local_rank=args.local_rank,
optimization_level=opt_level,
placement=device)
jasper_model_definition = toml.load("../../examples/nemo_asr/jasper10x5.toml")
jasper_model_definition['placement'] = device
labels = jasper_model_definition['labels']['labels']
# train_manifest = "/mnt/D1/Data/librispeech/librivox-train-all.json"
train_manifest = args.train_manifest
featurizer_config = jasper_model_definition['input']
data_preprocessor = neural_factory.get_module(
name="AudioToMelSpectrogramPreprocessor",
collection="nemo_asr",
# Copyright (c) 2019 NVIDIA Corporation
import nemo
from nemo.core import DeviceType
# instantiate Neural Factory with supported backend
# nf = nemo.core.NeuralModuleFactory(placement=DeviceType.CPU)
nf = nemo.core.NeuralModuleFactory()
# instantiate necessary neural modules
# RealFunctionDataLayer defaults to f=torch.sin, sampling from x=[-4, 4]
dl = nemo.tutorials.RealFunctionDataLayer(
n=10000, batch_size=128)
fx = nemo.tutorials.TaylorNet(dim=4)
loss = nemo.tutorials.MSELoss()
# describe activation's flow
x, y = dl()
p = fx(x=x)
lss = loss(predictions=p, target=y)
# SimpleLossLoggerCallback will print loss values to console.
callback = nemo.core.SimpleLossLoggerCallback(
tensors=[lss],
def main():
args = parse_args()
neural_factory = nemo.core.NeuralModuleFactory(
optimization_level=args.amp_opt_level,
backend=nemo.core.Backend.PyTorch)
# Create text to spectrogram model
if args.spec_model == "tacotron2":
yaml = YAML(typ="safe")
with open(args.spec_model_config) as file:
tacotron2_params = yaml.load(file)
spec_neural_modules = create_NMs(tacotron2_params, decoder_infer=True)
infer_tensors = create_infer_dags(
neural_factory=neural_factory,
neural_modules=spec_neural_modules,
tacotron2_params=tacotron2_params,
infer_dataset=args.eval_dataset,
infer_batch_size=args.batch_size)
parser.add_argument("--label_smoothing", default=0.1, type=float)
parser.add_argument("--beam_size", default=4, type=int)
parser.add_argument("--tokenizer_model", default="vocab.txt", type=str)
parser.add_argument("--predict_last_k", default=16, type=int)
parser.add_argument("--save_epoch_freq", default=1, type=int)
parser.add_argument("--save_step_freq", default=-1, type=int)
parser.add_argument("--interactive", action="store_true")
args = parser.parse_args()
"""
To get the data, go to tests/data and run get_wt2.sh
Then run create_vocab.py
"""
work_dir = f'{args.work_dir}/{args.dataset_name.upper()}'
nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch,
local_rank=args.local_rank,
optimization_level=args.amp_opt_level,
log_dir=args.work_dir,
create_tb_writer=True,
files_to_copy=[__file__])
data_desc = LanguageModelDataDesc(
args.dataset_name, args.data_dir, args.do_lower_case)
# define tokenizer, in this example we use word-level tokenizer
# we also adjust the vocabulary size to make it multiple of 8 to accelerate
# training in fp16 mode with the use of Tensor Cores
tokenizer = nemo_nlp.WordTokenizer(f"{args.data_dir}/{args.tokenizer_model}")
vocab_size = 8 * math.ceil(tokenizer.vocab_size / 8)
# instantiate necessary modules for the whole translation pipeline, namely
parser.add_argument("--bert_checkpoint", default='BERT-EPOCH-1.pt', type=str)
parser.add_argument("--classifier_checkpoint",
default='TokenClassifier-EPOCH-1.pt', type=str)
parser.add_argument("--bert_config", default=None, type=str)
parser.add_argument("--work_dir", default='output_punct', type=str,
help="The output directory where the model predictions \
and checkpoints will be written.")
args = parser.parse_args()
args.interactive = True
args.bert_checkpoint = '/home/ebakhturina/output/punct/dataset_33_dr0.2_lr0.0001/checkpoints/BERT-EPOCH-9.pt'
args.classifier_checkpoint = '/home/ebakhturina/output/punct/dataset_33_dr0.2_lr0.0001/checkpoints/TokenClassifier-EPOCH-9.pt'
# Instantiate Neural Factory with supported backend
nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch,
log_dir=args.work_dir)
output_file = f'{nf.work_dir}/output.txt'
tokenizer = NemoBertTokenizer(args.pretrained_bert_model)
bert_model = nemo_nlp.huggingface.BERT(
pretrained_model_name=args.pretrained_bert_model)
tag_ids = {'O': 0, ',': 3, '.': 2, '?': 1}
ids_to_tags = {tag_ids[k]: k for k in tag_ids}
num_labels = len(tag_ids)
hidden_size = bert_model.local_parameters["hidden_size"]
classifier = nemo_nlp.TokenClassifier(hidden_size=hidden_size,
num_classes=args.num_classes,
dropout=0)
parser.add_argument("--train_dataset",
# set default=os.getcwd() unless your are running test
default="/home/mrjenkins/TestData", type=str)
parser.add_argument("--amp_opt_level", choices=['O0', 'O1', 'O2', 'O3'],
default='O0')
args = parser.parse_args()
batch_size = args.batch_size
work_dir = f"GAN_{args.amp_opt_level}"
if args.work_dir:
work_dir = os.path.join(args.work_dir, work_dir)
# instantiate Neural Factory with supported backend
neural_factory = nemo.core.NeuralModuleFactory(
local_rank=args.local_rank,
optimization_level=args.amp_opt_level,
log_dir=work_dir,
create_tb_writer=True,
files_to_copy=[__file__]
)
mnist_data = nemo_simple_gan.MnistGanDataLayer(
batch_size=batch_size,
shuffle=True,
train=True,
root=args.train_dataset)
generator = nemo_simple_gan.SimpleGenerator(
batch_size=batch_size)
discriminator = nemo_simple_gan.SimpleDiscriminator()
Prepare GLUE task
MNLI task has two separate dev sets: matched and mismatched
"""
if args.task_name == 'mnli':
eval_task_names = ("mnli", "mnli-mm")
task_processors = (processors["mnli"](), processors["mnli-mm"]())
else:
eval_task_names = (args.task_name,)
task_processors = (processors[args.task_name](),)
label_list = task_processors[0].get_labels()
num_labels = len(label_list)
output_mode = output_modes[args.task_name]
# Instantiate neural factory with supported backend
nf = nemo.core.NeuralModuleFactory(backend=nemo.core.Backend.PyTorch,
local_rank=args.local_rank,
optimization_level=args.amp_opt_level,
log_dir=args.work_dir,
create_tb_writer=True,
files_to_copy=[__file__],
add_time_to_log_dir=True)
if args.bert_checkpoint is None:
""" Use this if you're using a standard BERT model.
To see the list of pretrained models, call:
nemo_nlp.huggingface.BERT.list_pretrained_models()
"""
tokenizer = NemoBertTokenizer(args.pretrained_bert_model)
model = nemo_nlp.huggingface.BERT(
pretrained_model_name=args.pretrained_bert_model)
else:
def main():
# Parse args
args = parse_args()
cfg = parse_cfg(args)
name = construct_name(args, cfg)
# instantiate Neural Factory with supported backend
neural_factory = nemo.core.NeuralModuleFactory(
backend=nemo.core.Backend.PyTorch,
local_rank=args.local_rank,
optimization_level=args.amp_opt_level,
log_dir=name,
checkpoint_dir=args.checkpoint_dir,
create_tb_writer=args.create_tb_writer,
files_to_copy=[args.model_config, __file__],
cudnn_benchmark=args.cudnn_benchmark,
tensorboard_dir=args.tensorboard_dir)
logger = neural_factory.logger
tb_writer = neural_factory.tb_writer
args.checkpoint_dir = neural_factory.checkpoint_dir
logger.info(f'Name:\n{name}')
logger.info(f'Args to be passed to job #{args.local_rank}:')