Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = torch.nn.Conv2d(1, 100, 1)
self.conv2 = torch.nn.Conv2d(100, 1, 1)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
return x
model = Net()
opt = torch.optim.SGD(model.parameters(), lr=0.1)
try:
hvd.DistributedOptimizer(opt,
named_parameters=list(model.named_parameters())[0:1])
assert False, 'hvd.DistributedOptimizer did not throw error'
except ValueError:
pass
# ceate model
model_config = config["model_config"]
model = lstm.LSTMAM(model_config["feat_dim"], model_config["label_size"], model_config["hidden_size"], model_config["num_layers"], model_config["dropout"], True)
model.cuda()
# setup the optimizer
optimizer = th.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
# Broadcast parameters and opterimizer state from rank 0 to all other processes.
hvd.broadcast_parameters(model.state_dict(), root_rank=0)
hvd.broadcast_optimizer_state(optimizer, root_rank=0)
# Add Horovod Distributed Optimizer
optimizer = hvd.DistributedOptimizer(optimizer, named_parameters=model.named_parameters())
if os.path.isfile(args.seed_model):
checkpoint = th.load(args.seed_model)
state_dict = checkpoint['model']
model.load_state_dict(state_dict)
print("=> loaded checkpoint '{}' ".format(args.seed_model))
else:
sys.stderr.write('ERROR: The model file %s does not exist!\n'%(args.seed_model))
sys.exit(0)
HCLG = args.den_dir + "/HCLG.fst"
words_txt = args.den_dir + "/words.txt"
silence_phones = args.den_dir + "/phones/silence.csl"
if not os.path.isfile(HCLG):
sys.stderr.write('ERROR: The HCLG file %s does not exist!\n'%(HCLG))
Returns: hvd.DistributedOptimizer: Optimizer to use for computing
gradients and applying updates.
"""
# Horovod: scale learning rate by the number of GPUs.
optimizer = optim.Adam(model.parameters(), lr=learning_rate * hvd.size())
# Horovod: broadcast parameters & optimizer state.
hvd.broadcast_parameters(model.state_dict(), root_rank=0)
hvd.broadcast_optimizer_state(optimizer, root_rank=0)
# Horovod: (optional) compression algorithm.
compression = hvd.Compression.fp16
# Horovod: wrap optimizer with DistributedOptimizer.
optimizer = hvd.DistributedOptimizer(
optimizer,
named_parameters=model.named_parameters(),
compression=compression,
)
return optimizer
# Start training
th.backends.cudnn.enabled = True
if th.cuda.is_available():
model.cuda()
# optimizer
optimizer = th.optim.Adam(model.parameters(), lr=args.lr, amsgrad=True)
if args.hvd:
# Broadcast parameters and opterimizer state from rank 0 to all other processes.
hvd.broadcast_parameters(model.state_dict(), root_rank=0)
hvd.broadcast_optimizer_state(optimizer, root_rank=0)
# Add Horovod Distributed Optimizer
optimizer = hvd.DistributedOptimizer(optimizer, named_parameters=model.named_parameters())
# criterion
criterion = nn.CrossEntropyLoss(ignore_index=-100)
start_epoch = 0
if args.resume_from_model:
assert os.path.isfile(args.resume_from_model), "ERROR: model file {} does not exit!".format(args.resume_from_model)
checkpoint = th.load(args.resume_from_model)
state_dict = checkpoint['model']
start_epoch = checkpoint['epoch']
model.load_state_dict(state_dict)
optimizer.load_state_dict(checkpoint['optimizer'])
print("=> loaded checkpoint '{}' ".format(args.resume_from_model))
cudnn.benchmark = True
# Set up standard model.
model = getattr(models, args.model)()
if args.cuda:
# Move model to GPU.
model.cuda()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# Horovod: (optional) compression algorithm.
compression = hvd.Compression.fp16 if args.fp16_allreduce else hvd.Compression.none
# Horovod: wrap optimizer with DistributedOptimizer.
optimizer = hvd.DistributedOptimizer(optimizer,
named_parameters=model.named_parameters(),
compression=compression)
# Horovod: broadcast parameters & optimizer state.
hvd.broadcast_parameters(model.state_dict(), root_rank=0)
hvd.broadcast_optimizer_state(optimizer, root_rank=0)
# Set up fixed fake data
data = torch.randn(args.batch_size, 3, 224, 224)
target = torch.LongTensor(args.batch_size).random_() % 1000
if args.cuda:
data, target = data.cuda(), target.cuda()
def benchmark_step():
optimizer.zero_grad()
for n, p in self.model.named_parameters()
if not any(nd in n for nd in no_decay)
],
"weight_decay": 0.01,
},
{
"params": [
p for n, p in self.model.named_parameters() if any(nd in n for nd in no_decay)
],
"weight_decay": 0.0,
},
]
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate, eps=1e-6)
if distributed:
optimizer = hvd.DistributedOptimizer(
optimizer,
named_parameters=self.model.named_parameters(),
backward_passes_per_step=gradient_accumulation_steps,
)
hvd.broadcast_parameters(self.model.state_dict(), root_rank=0)
hvd.broadcast_optimizer_state(optimizer, root_rank=0)
if warmup_proportion:
warmup_steps = t_total * warmup_proportion
else:
warmup_steps = 0
scheduler = WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps, t_total=t_total)
global_step = 0
def build_torch(self, model):
import torch
lookup = {
'sgd': torch.optim.SGD,
'adadelta': torch.optim.Adadelta,
'rmsprop': torch.optim.RMSprop,
'adam': torch.optim.Adam
}
if self.name not in lookup:
logging.warning("No optimizer '{}' found, using SGD instead".format(self.name))
self.name = 'sgd'
opt = lookup[self.name](model.parameters(), **self.config)
if self.horovod_wrapper:
import horovod.torch as hvd
opt = hvd.DistributedOptimizer(opt, named_parameters=model.named_parameters())
return opt
class_weights = torch.tensor(
config.DATASET.CLASS_WEIGHTS, device=device, requires_grad=False
)
criterion = torch.nn.CrossEntropyLoss(
weight=class_weights, ignore_index=255, reduction="mean"
)
# Horovod: broadcast parameters & optimizer state.
hvd.broadcast_parameters(model.state_dict(), root_rank=0)
hvd.broadcast_optimizer_state(optimizer, root_rank=0)
# Horovod: (optional) compression algorithm.
compression = hvd.Compression.fp16 if config.HOROVOD.FP16 else hvd.Compression.none
# Horovod: wrap optimizer with DistributedOptimizer.
optimizer = hvd.DistributedOptimizer(optimizer,
named_parameters=model.named_parameters(),
compression=compression)
# summary_writer = create_summary_writer(log_dir=config.LOG_DIR)
snapshot_duration = scheduler_step * len(train_loader)
warmup_duration = 5 * len(train_loader)
warmup_scheduler = LinearCyclicalScheduler(
optimizer,
"lr",
start_value=config.TRAIN.MAX_LR,
end_value=config.TRAIN.MAX_LR * world_size,
cycle_size=10 * len(train_loader),
)
cosine_scheduler = CosineAnnealingScheduler(
optimizer,
"lr",
Returns: hvd.DistributedOptimizer: Optimizer to use for computing
gradients and applying updates.
"""
# Horovod: scale learning rate by the number of GPUs.
optimizer = optim.Adam(model.parameters(), lr=learning_rate * hvd.size())
# Horovod: broadcast parameters & optimizer state.
hvd.broadcast_parameters(model.state_dict(), root_rank=0)
hvd.broadcast_optimizer_state(optimizer, root_rank=0)
# Horovod: (optional) compression algorithm.
compression = hvd.Compression.fp16
# Horovod: wrap optimizer with DistributedOptimizer.
optimizer = hvd.DistributedOptimizer(
optimizer,
named_parameters=model.named_parameters(),
compression=compression,
)
return optimizer