Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
model.compile(
loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"]
)
history = model.fit(
x_train, y_train, batch_size=16, epochs=5, validation_split=0.2, callbacks=[hook]
)
test_scores = model.evaluate(x_test, y_test, verbose=2, callbacks=[hook])
else:
model.compile(
loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"]
)
history = model.fit(x_train, y_train, batch_size=16, epochs=5, validation_split=0.2)
test_scores = model.evaluate(x_test, y_test, verbose=2)
# Check that hook created and tensors saved
trial = smd.create_trial(path=sim.out_dir)
assert smd.get_hook() is not None, "Hook was not created."
assert len(trial.steps()) > 0, "Nothing saved at any step."
assert len(trial.tensor_names()) > 0, "Tensors were not saved."
assert len(trial.tensor_names(collection="gradients")) > 0
if not tf_optimizer:
# as this is only supported for keras optimizers currently
assert len(trial.tensor_names(collection="optimizer_variables")) > 0
# Use multiple GPUs by MirroredStragtegy.
# All avaiable GPUs will be used if `num_gpus` is omitted.
# if num_devices > 1:
distribution = tf.contrib.distribute.MirroredStrategy()
# print("### Doing Multi GPU Training")
# else:
# distribution = None
# Pass to RunConfig
config = tf.estimator.RunConfig(
train_distribute=distribution,
eval_distribute=distribution if eval_distributed else None,
model_dir="/tmp/mnist_convnet_model",
)
if save_config is None:
save_config = smd.SaveConfig(save_interval=2)
if include_collections is None:
include_collections = [
CollectionKeys.WEIGHTS,
CollectionKeys.BIASES,
CollectionKeys.GRADIENTS,
CollectionKeys.LOSSES,
]
if not zcc:
ts_hook = smd.SessionHook(
out_dir=trial_dir,
save_all=save_all,
include_collections=include_collections,
save_config=save_config,
reduction_config=reduction_config,
"CollectionConfigurations": [
{
"CollectionName": "weights",
"CollectionParameters": null
},
{
"CollectionName": "losses",
"CollectionParameters": null
}
],
"DebugHookSpecification": null
}
"""
with SagemakerSimulator(json_file_contents=json_file_contents) as sim:
smd.del_hook()
hook = smd.get_hook(hook_type="session", create_if_not_exists=True)
print(hook)
assert "weights" in hook.include_collections, hook
# Horovod: BroadcastGlobalVariablesHook broadcasts initial variable states from
# rank 0 to all other processes. This is necessary to ensure consistent
# initialization of all workers when training is started with random weights or
# restored from a checkpoint.
bcast_hook = hvd.BroadcastGlobalVariablesHook(0)
# Train the model
train_input_fn = tf.estimator.inputs.numpy_input_fn(
x={"x": train_data}, y=train_labels, batch_size=100, num_epochs=None, shuffle=True
)
# Setup the Tornasole Hook
# save tensors as reductions if necessary
rdnc = (
smd.ReductionConfig(reductions=["mean"], abs_reductions=["max"], norms=["l1"])
if FLAGS.reductions
else None
)
ts_hook = smd.SessionHook(
out_dir=FLAGS.smdebug_path,
save_all=FLAGS.save_all,
include_collections=["weights", "gradients", "losses", "biases"],
save_config=smd.SaveConfig(save_interval=FLAGS.save_frequency),
reduction_config=rdnc,
)
ts_hook.set_mode(smd.modes.TRAIN)
# Horovod: adjust number of steps based on number of GPUs.
mnist_classifier.train(
# Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the
# `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
# Calculate Loss (for both TRAIN and EVAL modes)
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
# Configure the Training Op (for TRAIN mode)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
optimizer = smd.get_hook().wrap_optimizer(optimizer)
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
# Add evaluation metrics (for EVAL mode)
eval_metric_ops = {
"accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
}
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def main():
parser = argparse.ArgumentParser(description="Train resnet50 cifar10")
parser.add_argument("--batch_size", type=int, default=32)
parser.add_argument("--epoch", type=int, default=3)
parser.add_argument("--model_dir", type=str, default="./model_keras_resnet")
opt = parser.parse_args()
model = ResNet50(weights=None, input_shape=(32, 32, 3), classes=10)
##### Enabling SageMaker Debugger ###########
# Create hook from the configuration provided through sagemaker python sdk
hook = smd.KerasHook.create_from_json_file()
optimizer = tf.keras.optimizers.Adam()
##### Enabling SageMaker Debugger ###########
# wrap the optimizer so the hook can identify the gradients
optimizer = hook.wrap_optimizer(optimizer)
model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
train(opt.batch_size, opt.epoch, model, hook)
"--num_eval_steps",
type=int,
help="Number of steps to evaluate for. If this"
"is passed, it doesnt evaluate over the full eval set",
)
parser.add_argument("--model_dir", type=str, default="/tmp/mnist_model")
args = parser.parse_args()
if args.random_seed:
tf.set_random_seed(2)
np.random.seed(2)
random.seed(12)
##### Enabling SageMaker Debugger ###########
# creating hook
hook = smd.EstimatorHook(
out_dir=args.out_dir,
include_collections=["weights", "gradients"],
save_config=smd.SaveConfig(save_interval=args.save_interval),
)
def cnn_model_fn(features, labels, mode):
"""Model function for CNN."""
# Input Layer
input_layer = tf.reshape(features["x"], [-1, 28, 28, 1])
# Convolutional Layer #1
conv1 = tf.layers.conv2d(
inputs=input_layer,
filters=32,
kernel_size=[5, 5],
padding="same",
# Use multiple GPUs by MirroredStragtegy.
# All avaiable GPUs will be used if `num_gpus` is omitted.
if num_gpus > 1:
distribution = tf.contrib.distribute.MirroredStrategy(num_gpus=num_gpus)
print("### Doing Multi GPU Training")
else:
distribution = None
# Pass to RunConfig
config = tf.estimator.RunConfig(
train_distribute=distribution, model_dir="/tmp/mnist_convnet_model"
)
# save tensors as reductions if necessary
rdnc = (
smd.ReductionConfig(reductions=["mean"], abs_reductions=["max"], norms=["l1"])
if FLAGS.reductions
else None
)
ts_hook = smd.SessionHook(
out_dir=FLAGS.smdebug_path,
save_all=FLAGS.save_all,
include_collections=["weights", "gradients", "losses", "biases"],
save_config=smd.SaveConfig(save_interval=FLAGS.save_frequency),
reduction_config=rdnc,
)
ts_hook.set_mode(smd.modes.TRAIN)
# Create the Estimator
# pass RunConfig
def main():
parser = argparse.ArgumentParser(description="Train resnet50 cifar10")
parser.add_argument("--batch_size", type=int, default=32)
parser.add_argument("--epoch", type=int, default=3)
parser.add_argument("--model_dir", type=str, default="./model_keras_resnet")
parser.add_argument("--out_dir", type=str)
parser.add_argument("--save_interval", type=int, default=500)
opt = parser.parse_args()
model = ResNet50(weights=None, input_shape=(32, 32, 3), classes=10)
##### Enabling SageMaker Debugger ###########
# creating hook
hook = smd.KerasHook(
out_dir=opt.out_dir,
include_collections=["weights", "gradients", "losses"],
save_config=smd.SaveConfig(save_interval=opt.save_interval),
)
optimizer = tf.keras.optimizers.Adam()
##### Enabling SageMaker Debugger ###########
# wrap the optimizer so the hook can identify the gradients
optimizer = hook.wrap_optimizer(optimizer)
model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
# start the training.
train(opt.batch_size, opt.epoch, model, hook)