Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, local_optimizer=None, **kwargs):
"""
Initializes a distributed horovod optimizer by wrapping a local optimizer.
Args:
local_optimizer (Optional[dict,LocalOptimizer]): The spec-dict for the wrapped LocalOptimizer object or
a LocalOptimizer object itself.
"""
super(HorovodOptimizer, self).__init__(**kwargs)
# Create the horovod wrapper.
wrapped_local_optimizer = Optimizer.from_spec(local_optimizer)
self.local_optimizer = hvd.DistributedOptimizer(wrapped_local_optimizer)
@rlgraph_api
def step(self, variables, loss, time_percentage, *inputs):
grads_and_vars = self._graph_fn_calculate_gradients(variables, loss, time_percentage, *inputs)
return self._graph_fn_apply_gradients(grads_and_vars)
# Builds learning rate decay scheduler
static_lr = config_downstream.lr['static_lr']
num_train_steps = int(num_train_data / config_data.train_batch_size
* config_data.max_train_epoch)
num_warmup_steps = int(num_train_steps * config_data.warmup_proportion)
lr = model_utils.get_lr(global_step, num_train_steps, # lr is a Tensor
num_warmup_steps, static_lr)
opt = tx.core.get_optimizer(
global_step=global_step,
learning_rate=lr,
hparams=config_downstream.opt
)
if FLAGS.distributed:
opt = hvd.DistributedOptimizer(opt)
train_op = tf.contrib.layers.optimize_loss(
loss=loss,
global_step=global_step,
learning_rate=None,
optimizer=opt)
# Train/eval/test routine
def _is_head():
if not FLAGS.distributed:
return True
return hvd.rank() == 0
def _train_epoch(sess):
"""Trains on the training set, and evaluates on the dev set
# Losses & train ops
mle_loss = tx.losses.sequence_sparse_softmax_cross_entropy(
labels=targets,
logits=outputs.logits,
sequence_length=seq_lengths)
# Use global_step to pass epoch, for lr decay
global_step = tf.placeholder(tf.int32)
opt = tx.core.get_optimizer(
global_step=global_step,
hparams=config.opt
)
# 2. wrap the optimizer
opt = hvd.DistributedOptimizer(opt)
train_op = tx.core.get_train_op(
loss=mle_loss,
optimizer=opt,
global_step=global_step,
learning_rate=None,
increment_global_step=False,
hparams=config.opt
)
def _run_epoch(sess, data_iter, epoch, is_train=False, verbose=False):
start_time = time.time()
loss = 0.
iters = 0
fetches = {
tf.summary.scalar('top5_accuracy', top5_accuracy[1])
if mode == tf.estimator.ModeKeys.EVAL:
metrics = {'top1_accuracy': top1_accuracy,
'top5_accuracy': top5_accuracy}
return tf.estimator.EstimatorSpec(
mode, loss=loss, eval_metric_ops=metrics)
assert(mode == tf.estimator.ModeKeys.TRAIN)
#batch_size = inputs.shape[0]
batch_size = tf.shape(inputs)[0]
learning_rate = tf.train.polynomial_decay(
learning_rate_init, tf.train.get_global_step(),
decay_steps=decay_steps, end_learning_rate=0.,
power=learning_rate_power, cycle=False, name='learning_rate')
opt = tf.train.MomentumOptimizer(
learning_rate, momentum, use_nesterov=True)
opt = hvd.DistributedOptimizer(opt)
opt = nvutils.LarcOptimizer(opt, learning_rate, larc_eta, clip=larc_mode)
opt = nvutils.LossScalingOptimizer(opt, scale=loss_scale)
gate_gradients = (tf.train.Optimizer.GATE_OP if deterministic else
tf.train.Optimizer.GATE_NONE)
train_op = opt.minimize(
loss, global_step=tf.train.get_global_step(),
gate_gradients=gate_gradients, name='step_update')
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) or []
train_op = tf.group(train_op, update_ops)
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def create_optimizer(trial):
# We optimize the choice of optimizers as well as their parameters.
weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)
optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'MomentumSGD'])
if optimizer_name == 'Adam':
adam_lr = trial.suggest_loguniform('adam_lr', 1e-5, 1e-1)
optimizer = tf.contrib.opt.AdamWOptimizer(learning_rate=adam_lr, weight_decay=weight_decay)
else:
momentum_sgd_lr = trial.suggest_loguniform('momentum_sgd_lr', 1e-5, 1e-1)
momentum = trial.suggest_loguniform('momentum', 1e-5, 1e-1)
optimizer = tf.contrib.opt.MomentumWOptimizer(
learning_rate=momentum_sgd_lr, momentum=momentum, weight_decay=weight_decay)
return hvd.DistributedOptimizer(optimizer)
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate Loss (for both TRAIN and EVAL modes)
onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10)
loss = tf.losses.softmax_cross_entropy(
onehot_labels=onehot_labels, logits=logits)
# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
# Horovod: scale learning rate by the number of workers.
optimizer = tf.train.MomentumOptimizer(
learning_rate=0.001 * hvd.size(), momentum=0.9)
# Horovod: add Horovod Distributed Optimizer.
optimizer = hvd.DistributedOptimizer(optimizer)
train_op = optimizer.minimize(
loss=loss,
global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss,
train_op=train_op)
# Add evaluation metrics (for EVAL mode)
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(
labels=labels, predictions=predictions["classes"])}
return tf.estimator.EstimatorSpec(
mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
with tf.variable_scope("Model", reuse=None):
with log.verbose_level(2):
m = get_model("resnet", config, **kwargs)
global_step = tf.get_variable(
"global_step", [],
initializer=tf.constant_initializer(0),
trainable=False,
dtype=tf.int64)
lr = tf.train.piecewise_constant(
global_step, config.learn_rate_decay_steps,
[config.learn_rate] + list(config.learn_rate_list))
m._lr = lr
m._global_step = global_step
opt = tf.train.MomentumOptimizer(lr, 0.9)
opt = hvd.DistributedOptimizer(opt)
hooks = [hvd.BroadcastGlobalVariablesHook(0)]
m._train_op = opt.minimize(m.cost, global_step=global_step, name="train_step")
tf.summary.scalar("train ce", m.cross_ent)
return m, hooks
logits = tf.contrib.layers.fully_connected(
inputs=input_layer, num_outputs=num_classes,
activation_fn=None)
predictions = tf.argmax(logits, axis=-1)
probs = tf.nn.softmax(logits)
loss, train_op = None, None
metrics = {}
if mode != tf.estimator.ModeKeys.PREDICT:
label_lookup_table = tf.contrib.lookup.index_table_from_file(
FLAGS.label_file, vocab_size=FLAGS.num_labels)
labels = label_lookup_table.lookup(labels)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits))
opt = tf.train.AdamOptimizer(params["learning_rate"])
if FLAGS.horovod:
opt = hvd.DistributedOptimizer(opt)
train_op = opt.minimize(loss, global_step=tf.train.get_global_step())
metrics = {
"accuracy": tf.metrics.accuracy(labels, predictions)
}
exports = {}
if FLAGS.export_dir:
exports = Exports(probs, text_embedding)
return tf.estimator.EstimatorSpec(
mode, predictions=predictions, loss=loss, train_op=train_op,
eval_metric_ops=metrics, export_outputs=exports)
session_config = tf.ConfigProto(
last_paf = net.last_paf # net output
confs_ = net.confs # GT
pafs_ = net.pafs # GT
mask = net.m1 # mask1, GT
# net.m2 = m2 # mask2, GT
stage_losses = net.stage_losses
l2_loss = net.l2_loss
global_step = tf.Variable(1, trainable=False)
# scaled_lr = lr_init * hvd.size() # Horovod: scale the learning rate linearly
scaled_lr = lr_init # Linear scaling rule is not working in openpose training.
with tf.variable_scope('learning_rate'):
lr_v = tf.Variable(scaled_lr, trainable=False)
opt = tf.train.MomentumOptimizer(lr_v, 0.9)
opt = hvd.DistributedOptimizer(opt) # Horovod
train_op = opt.minimize(total_loss, global_step=global_step)
config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
config.gpu_options.allow_growth = True # Horovod
config.gpu_options.visible_device_list = str(hvd.local_rank()) # Horovod
# Add variable initializer.
init = tf.global_variables_initializer()
# Horovod: broadcast initial variable states from rank 0 to all other processes.
# This is necessary to ensure consistent initialization of all workers when
# training is started with random weights or restored from a checkpoint.
bcast = hvd.broadcast_global_variables(0) # Horovod
# Horovod: adjust number of steps based on number of GPUs.
global n_step, lr_decay_every_step